1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
5
6define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
7; GCN-LABEL: dyn_extract_v8f32_const_s_v:
8; GCN:       ; %bb.0: ; %entry
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
11; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
12; GCN-NEXT:    v_cndmask_b32_e64 v6, 1.0, 2.0, vcc
13; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
14; GCN-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
15; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
16; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
17; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
18; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
19; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
20; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
21; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
22; GCN-NEXT:    v_mov_b32_e32 v4, 0x40e00000
23; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
24; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
25; GCN-NEXT:    v_mov_b32_e32 v5, 0x41000000
26; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
27; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
28; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
29; GCN-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX10-LABEL: dyn_extract_v8f32_const_s_v:
32; GFX10:       ; %bb.0: ; %entry
33; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
35; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
36; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
37; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
38; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
39; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
40; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
41; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
42; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
43; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
44; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
45; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
46; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
47; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
48; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo
49; GFX10-NEXT:    s_setpc_b64 s[30:31]
50entry:
51  %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
52  ret float %ext
53}
54
55define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
56; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s:
57; GPRIDX:       ; %bb.0: ; %entry
58; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 1
59; GPRIDX-NEXT:    s_cselect_b32 s0, 2.0, 1.0
60; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 2
61; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40400000, s0
62; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 3
63; GPRIDX-NEXT:    s_cselect_b32 s0, 4.0, s0
64; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 4
65; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40a00000, s0
66; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 5
67; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40c00000, s0
68; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 6
69; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40e00000, s0
70; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 7
71; GPRIDX-NEXT:    s_cselect_b32 s0, 0x41000000, s0
72; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
73; GPRIDX-NEXT:    ; return to shader part epilog
74;
75; MOVREL-LABEL: dyn_extract_v8f32_const_s_s:
76; MOVREL:       ; %bb.0: ; %entry
77; MOVREL-NEXT:    s_mov_b32 s4, 1.0
78; MOVREL-NEXT:    s_mov_b32 m0, s2
79; MOVREL-NEXT:    s_mov_b32 s11, 0x41000000
80; MOVREL-NEXT:    s_mov_b32 s10, 0x40e00000
81; MOVREL-NEXT:    s_mov_b32 s9, 0x40c00000
82; MOVREL-NEXT:    s_mov_b32 s8, 0x40a00000
83; MOVREL-NEXT:    s_mov_b32 s7, 4.0
84; MOVREL-NEXT:    s_mov_b32 s6, 0x40400000
85; MOVREL-NEXT:    s_mov_b32 s5, 2.0
86; MOVREL-NEXT:    s_movrels_b32 s0, s4
87; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
88; MOVREL-NEXT:    ; return to shader part epilog
89;
90; GFX10-LABEL: dyn_extract_v8f32_const_s_s:
91; GFX10:       ; %bb.0: ; %entry
92; GFX10-NEXT:    s_mov_b32 s4, 1.0
93; GFX10-NEXT:    s_mov_b32 m0, s2
94; GFX10-NEXT:    s_mov_b32 s11, 0x41000000
95; GFX10-NEXT:    s_mov_b32 s10, 0x40e00000
96; GFX10-NEXT:    s_mov_b32 s9, 0x40c00000
97; GFX10-NEXT:    s_mov_b32 s8, 0x40a00000
98; GFX10-NEXT:    s_mov_b32 s7, 4.0
99; GFX10-NEXT:    s_mov_b32 s6, 0x40400000
100; GFX10-NEXT:    s_mov_b32 s5, 2.0
101; GFX10-NEXT:    s_movrels_b32 s0, s4
102; GFX10-NEXT:    v_mov_b32_e32 v0, s0
103; GFX10-NEXT:    ; return to shader part epilog
104entry:
105  %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
106  ret float %ext
107}
108
109define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) {
110; GCN-LABEL: dyn_extract_v8f32_s_v:
111; GCN:       ; %bb.0: ; %entry
112; GCN-NEXT:    s_mov_b32 s0, s2
113; GCN-NEXT:    s_mov_b32 s1, s3
114; GCN-NEXT:    s_mov_b32 s2, s4
115; GCN-NEXT:    v_mov_b32_e32 v1, s0
116; GCN-NEXT:    v_mov_b32_e32 v2, s1
117; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
118; GCN-NEXT:    s_mov_b32 s3, s5
119; GCN-NEXT:    v_mov_b32_e32 v3, s2
120; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
121; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
122; GCN-NEXT:    v_mov_b32_e32 v4, s3
123; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
124; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
125; GCN-NEXT:    v_mov_b32_e32 v5, s6
126; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
127; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
128; GCN-NEXT:    v_mov_b32_e32 v6, s7
129; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
130; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
131; GCN-NEXT:    v_mov_b32_e32 v7, s8
132; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
133; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
134; GCN-NEXT:    v_mov_b32_e32 v8, s9
135; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
136; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
137; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v8, vcc
138; GCN-NEXT:    ; return to shader part epilog
139;
140; GFX10-LABEL: dyn_extract_v8f32_s_v:
141; GFX10:       ; %bb.0: ; %entry
142; GFX10-NEXT:    s_mov_b32 s1, s3
143; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
144; GFX10-NEXT:    v_mov_b32_e32 v1, s1
145; GFX10-NEXT:    s_mov_b32 s0, s2
146; GFX10-NEXT:    s_mov_b32 s2, s4
147; GFX10-NEXT:    s_mov_b32 s3, s5
148; GFX10-NEXT:    s_mov_b32 s4, s6
149; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
150; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
151; GFX10-NEXT:    s_mov_b32 s5, s7
152; GFX10-NEXT:    s_mov_b32 s6, s8
153; GFX10-NEXT:    s_mov_b32 s7, s9
154; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
155; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
156; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
157; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
158; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
159; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
160; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
161; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
162; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
163; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
164; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s7, vcc_lo
165; GFX10-NEXT:    ; return to shader part epilog
166entry:
167  %ext = extractelement <8 x float> %vec, i32 %sel
168  ret float %ext
169}
170
171define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) {
172; GCN-LABEL: dyn_extract_v8f32_v_v:
173; GCN:       ; %bb.0: ; %entry
174; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
176; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
177; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
178; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
179; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
180; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
181; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
182; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
183; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
184; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
185; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
186; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
187; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
188; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
189; GCN-NEXT:    s_setpc_b64 s[30:31]
190;
191; GFX10-LABEL: dyn_extract_v8f32_v_v:
192; GFX10:       ; %bb.0: ; %entry
193; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
195; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
196; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
197; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
198; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
199; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
200; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
201; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
202; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
203; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
204; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
205; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
206; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
207; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
208; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
209; GFX10-NEXT:    s_setpc_b64 s[30:31]
210entry:
211  %ext = extractelement <8 x float> %vec, i32 %sel
212  ret float %ext
213}
214
215define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) {
216; GPRIDX-LABEL: dyn_extract_v8f32_v_s:
217; GPRIDX:       ; %bb.0: ; %entry
218; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
219; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
220; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
221; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
222; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
223; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
224; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
225; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
226; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
227; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
228; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
229; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
230; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 7
231; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
232; GPRIDX-NEXT:    ; return to shader part epilog
233;
234; MOVREL-LABEL: dyn_extract_v8f32_v_s:
235; MOVREL:       ; %bb.0: ; %entry
236; MOVREL-NEXT:    s_mov_b32 m0, s2
237; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
238; MOVREL-NEXT:    ; return to shader part epilog
239;
240; GFX10-LABEL: dyn_extract_v8f32_v_s:
241; GFX10:       ; %bb.0: ; %entry
242; GFX10-NEXT:    s_mov_b32 m0, s2
243; GFX10-NEXT:    v_movrels_b32_e32 v0, v0
244; GFX10-NEXT:    ; return to shader part epilog
245entry:
246  %ext = extractelement <8 x float> %vec, i32 %sel
247  ret float %ext
248}
249
250define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) {
251; GPRIDX-LABEL: dyn_extract_v8f32_s_s:
252; GPRIDX:       ; %bb.0: ; %entry
253; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
254; GPRIDX-NEXT:    s_cselect_b32 s0, s3, s2
255; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
256; GPRIDX-NEXT:    s_cselect_b32 s0, s4, s0
257; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
258; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s0
259; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
260; GPRIDX-NEXT:    s_cselect_b32 s0, s6, s0
261; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
262; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s0
263; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
264; GPRIDX-NEXT:    s_cselect_b32 s0, s8, s0
265; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 7
266; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s0
267; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
268; GPRIDX-NEXT:    ; return to shader part epilog
269;
270; MOVREL-LABEL: dyn_extract_v8f32_s_s:
271; MOVREL:       ; %bb.0: ; %entry
272; MOVREL-NEXT:    s_mov_b32 s0, s2
273; MOVREL-NEXT:    s_mov_b32 m0, s10
274; MOVREL-NEXT:    s_mov_b32 s1, s3
275; MOVREL-NEXT:    s_mov_b32 s2, s4
276; MOVREL-NEXT:    s_mov_b32 s3, s5
277; MOVREL-NEXT:    s_mov_b32 s4, s6
278; MOVREL-NEXT:    s_mov_b32 s5, s7
279; MOVREL-NEXT:    s_mov_b32 s6, s8
280; MOVREL-NEXT:    s_mov_b32 s7, s9
281; MOVREL-NEXT:    s_movrels_b32 s0, s0
282; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
283; MOVREL-NEXT:    ; return to shader part epilog
284;
285; GFX10-LABEL: dyn_extract_v8f32_s_s:
286; GFX10:       ; %bb.0: ; %entry
287; GFX10-NEXT:    s_mov_b32 s0, s2
288; GFX10-NEXT:    s_mov_b32 m0, s10
289; GFX10-NEXT:    s_mov_b32 s1, s3
290; GFX10-NEXT:    s_mov_b32 s2, s4
291; GFX10-NEXT:    s_mov_b32 s3, s5
292; GFX10-NEXT:    s_mov_b32 s4, s6
293; GFX10-NEXT:    s_mov_b32 s5, s7
294; GFX10-NEXT:    s_mov_b32 s6, s8
295; GFX10-NEXT:    s_mov_b32 s7, s9
296; GFX10-NEXT:    s_movrels_b32 s0, s0
297; GFX10-NEXT:    v_mov_b32_e32 v0, s0
298; GFX10-NEXT:    ; return to shader part epilog
299entry:
300  %ext = extractelement <8 x float> %vec, i32 %sel
301  ret float %ext
302}
303
304define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
305; GCN-LABEL: dyn_extract_v8i64_const_s_v:
306; GCN:       ; %bb.0: ; %entry
307; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GCN-NEXT:    s_mov_b64 s[4:5], 1
309; GCN-NEXT:    s_mov_b64 s[6:7], 2
310; GCN-NEXT:    v_mov_b32_e32 v1, s4
311; GCN-NEXT:    v_mov_b32_e32 v2, s5
312; GCN-NEXT:    v_mov_b32_e32 v3, s6
313; GCN-NEXT:    v_mov_b32_e32 v4, s7
314; GCN-NEXT:    s_mov_b64 s[8:9], 3
315; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
316; GCN-NEXT:    v_mov_b32_e32 v5, s8
317; GCN-NEXT:    v_mov_b32_e32 v6, s9
318; GCN-NEXT:    s_mov_b64 s[10:11], 4
319; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
320; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
321; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
322; GCN-NEXT:    v_mov_b32_e32 v7, s10
323; GCN-NEXT:    v_mov_b32_e32 v8, s11
324; GCN-NEXT:    s_mov_b64 s[12:13], 5
325; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
326; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
327; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
328; GCN-NEXT:    s_mov_b64 s[14:15], 6
329; GCN-NEXT:    v_mov_b32_e32 v9, s12
330; GCN-NEXT:    v_mov_b32_e32 v10, s13
331; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
332; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
333; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
334; GCN-NEXT:    s_mov_b64 s[16:17], 7
335; GCN-NEXT:    v_mov_b32_e32 v11, s14
336; GCN-NEXT:    v_mov_b32_e32 v12, s15
337; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
338; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
339; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
340; GCN-NEXT:    s_mov_b64 s[18:19], 8
341; GCN-NEXT:    v_mov_b32_e32 v13, s16
342; GCN-NEXT:    v_mov_b32_e32 v14, s17
343; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
344; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
345; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
346; GCN-NEXT:    v_mov_b32_e32 v15, s18
347; GCN-NEXT:    v_mov_b32_e32 v16, s19
348; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
349; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
350; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
351; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
352; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
353; GCN-NEXT:    s_setpc_b64 s[30:31]
354;
355; GFX10-LABEL: dyn_extract_v8i64_const_s_v:
356; GFX10:       ; %bb.0: ; %entry
357; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
359; GFX10-NEXT:    s_mov_b64 s[6:7], 2
360; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
361; GFX10-NEXT:    v_mov_b32_e32 v1, s6
362; GFX10-NEXT:    v_mov_b32_e32 v2, s7
363; GFX10-NEXT:    s_mov_b64 s[4:5], 1
364; GFX10-NEXT:    s_mov_b64 s[8:9], 3
365; GFX10-NEXT:    s_mov_b64 s[10:11], 4
366; GFX10-NEXT:    v_cndmask_b32_e32 v1, s4, v1, vcc_lo
367; GFX10-NEXT:    v_cndmask_b32_e32 v2, s5, v2, vcc_lo
368; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
369; GFX10-NEXT:    s_mov_b64 s[12:13], 5
370; GFX10-NEXT:    s_mov_b64 s[14:15], 6
371; GFX10-NEXT:    s_mov_b64 s[16:17], 7
372; GFX10-NEXT:    s_mov_b64 s[18:19], 8
373; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
374; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
375; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
376; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
377; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
378; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
379; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
380; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
381; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
382; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s14, vcc_lo
383; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s15, vcc_lo
384; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
385; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s16, vcc_lo
386; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s17, vcc_lo
387; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
388; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s18, vcc_lo
389; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s19, vcc_lo
390; GFX10-NEXT:    s_setpc_b64 s[30:31]
391entry:
392  %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
393  ret i64 %ext
394}
395
396define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
397; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s:
398; GPRIDX:       ; %bb.0: ; %entry
399; GPRIDX-NEXT:    s_mov_b64 s[4:5], 1
400; GPRIDX-NEXT:    s_mov_b32 m0, s2
401; GPRIDX-NEXT:    s_mov_b64 s[18:19], 8
402; GPRIDX-NEXT:    s_mov_b64 s[16:17], 7
403; GPRIDX-NEXT:    s_mov_b64 s[14:15], 6
404; GPRIDX-NEXT:    s_mov_b64 s[12:13], 5
405; GPRIDX-NEXT:    s_mov_b64 s[10:11], 4
406; GPRIDX-NEXT:    s_mov_b64 s[8:9], 3
407; GPRIDX-NEXT:    s_mov_b64 s[6:7], 2
408; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[4:5]
409; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
410; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
411; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
412; GPRIDX-NEXT:    s_endpgm
413;
414; MOVREL-LABEL: dyn_extract_v8i64_const_s_s:
415; MOVREL:       ; %bb.0: ; %entry
416; MOVREL-NEXT:    s_mov_b64 s[4:5], 1
417; MOVREL-NEXT:    s_mov_b32 m0, s2
418; MOVREL-NEXT:    s_mov_b64 s[18:19], 8
419; MOVREL-NEXT:    s_mov_b64 s[16:17], 7
420; MOVREL-NEXT:    s_mov_b64 s[14:15], 6
421; MOVREL-NEXT:    s_mov_b64 s[12:13], 5
422; MOVREL-NEXT:    s_mov_b64 s[10:11], 4
423; MOVREL-NEXT:    s_mov_b64 s[8:9], 3
424; MOVREL-NEXT:    s_mov_b64 s[6:7], 2
425; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[4:5]
426; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
427; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
428; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
429; MOVREL-NEXT:    s_endpgm
430;
431; GFX10-LABEL: dyn_extract_v8i64_const_s_s:
432; GFX10:       ; %bb.0: ; %entry
433; GFX10-NEXT:    s_mov_b64 s[4:5], 1
434; GFX10-NEXT:    s_mov_b32 m0, s2
435; GFX10-NEXT:    s_mov_b64 s[18:19], 8
436; GFX10-NEXT:    s_mov_b64 s[16:17], 7
437; GFX10-NEXT:    s_mov_b64 s[14:15], 6
438; GFX10-NEXT:    s_mov_b64 s[12:13], 5
439; GFX10-NEXT:    s_mov_b64 s[10:11], 4
440; GFX10-NEXT:    s_mov_b64 s[8:9], 3
441; GFX10-NEXT:    s_mov_b64 s[6:7], 2
442; GFX10-NEXT:    s_movrels_b64 s[0:1], s[4:5]
443; GFX10-NEXT:    v_mov_b32_e32 v0, s0
444; GFX10-NEXT:    v_mov_b32_e32 v1, s1
445; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
446; GFX10-NEXT:    s_endpgm
447entry:
448  %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
449  store i64 %ext, i64 addrspace(1)* undef
450  ret void
451}
452
453define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
454; GPRIDX-LABEL: dyn_extract_v8i64_s_v:
455; GPRIDX:       ; %bb.0: ; %entry
456; GPRIDX-NEXT:    s_mov_b32 s0, s2
457; GPRIDX-NEXT:    s_mov_b32 s1, s3
458; GPRIDX-NEXT:    s_mov_b32 s2, s4
459; GPRIDX-NEXT:    s_mov_b32 s3, s5
460; GPRIDX-NEXT:    s_mov_b32 s4, s6
461; GPRIDX-NEXT:    s_mov_b32 s5, s7
462; GPRIDX-NEXT:    v_mov_b32_e32 v1, s0
463; GPRIDX-NEXT:    v_mov_b32_e32 v2, s1
464; GPRIDX-NEXT:    v_mov_b32_e32 v3, s2
465; GPRIDX-NEXT:    v_mov_b32_e32 v4, s3
466; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
467; GPRIDX-NEXT:    s_mov_b32 s6, s8
468; GPRIDX-NEXT:    s_mov_b32 s7, s9
469; GPRIDX-NEXT:    v_mov_b32_e32 v5, s4
470; GPRIDX-NEXT:    v_mov_b32_e32 v6, s5
471; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
472; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
473; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
474; GPRIDX-NEXT:    s_mov_b32 s8, s10
475; GPRIDX-NEXT:    s_mov_b32 s9, s11
476; GPRIDX-NEXT:    v_mov_b32_e32 v7, s6
477; GPRIDX-NEXT:    v_mov_b32_e32 v8, s7
478; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
479; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
480; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
481; GPRIDX-NEXT:    s_mov_b32 s10, s12
482; GPRIDX-NEXT:    s_mov_b32 s11, s13
483; GPRIDX-NEXT:    v_mov_b32_e32 v9, s8
484; GPRIDX-NEXT:    v_mov_b32_e32 v10, s9
485; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
486; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
487; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
488; GPRIDX-NEXT:    v_mov_b32_e32 v11, s10
489; GPRIDX-NEXT:    v_mov_b32_e32 v12, s11
490; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
491; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
492; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
493; GPRIDX-NEXT:    v_mov_b32_e32 v13, s14
494; GPRIDX-NEXT:    v_mov_b32_e32 v14, s15
495; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
496; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
497; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
498; GPRIDX-NEXT:    v_mov_b32_e32 v15, s16
499; GPRIDX-NEXT:    v_mov_b32_e32 v16, s17
500; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
501; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
502; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
503; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
504; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
505; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
506; GPRIDX-NEXT:    s_endpgm
507;
508; MOVREL-LABEL: dyn_extract_v8i64_s_v:
509; MOVREL:       ; %bb.0: ; %entry
510; MOVREL-NEXT:    s_mov_b32 s0, s2
511; MOVREL-NEXT:    s_mov_b32 s1, s3
512; MOVREL-NEXT:    s_mov_b32 s2, s4
513; MOVREL-NEXT:    s_mov_b32 s3, s5
514; MOVREL-NEXT:    s_mov_b32 s4, s6
515; MOVREL-NEXT:    s_mov_b32 s5, s7
516; MOVREL-NEXT:    v_mov_b32_e32 v1, s0
517; MOVREL-NEXT:    v_mov_b32_e32 v2, s1
518; MOVREL-NEXT:    v_mov_b32_e32 v3, s2
519; MOVREL-NEXT:    v_mov_b32_e32 v4, s3
520; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
521; MOVREL-NEXT:    s_mov_b32 s6, s8
522; MOVREL-NEXT:    s_mov_b32 s7, s9
523; MOVREL-NEXT:    v_mov_b32_e32 v5, s4
524; MOVREL-NEXT:    v_mov_b32_e32 v6, s5
525; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
526; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
527; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
528; MOVREL-NEXT:    s_mov_b32 s8, s10
529; MOVREL-NEXT:    s_mov_b32 s9, s11
530; MOVREL-NEXT:    v_mov_b32_e32 v7, s6
531; MOVREL-NEXT:    v_mov_b32_e32 v8, s7
532; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
533; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
534; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
535; MOVREL-NEXT:    s_mov_b32 s10, s12
536; MOVREL-NEXT:    s_mov_b32 s11, s13
537; MOVREL-NEXT:    v_mov_b32_e32 v9, s8
538; MOVREL-NEXT:    v_mov_b32_e32 v10, s9
539; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
540; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
541; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
542; MOVREL-NEXT:    v_mov_b32_e32 v11, s10
543; MOVREL-NEXT:    v_mov_b32_e32 v12, s11
544; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
545; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
546; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
547; MOVREL-NEXT:    v_mov_b32_e32 v13, s14
548; MOVREL-NEXT:    v_mov_b32_e32 v14, s15
549; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
550; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
551; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
552; MOVREL-NEXT:    v_mov_b32_e32 v15, s16
553; MOVREL-NEXT:    v_mov_b32_e32 v16, s17
554; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
555; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
556; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
557; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
558; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
559; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
560; MOVREL-NEXT:    s_endpgm
561;
562; GFX10-LABEL: dyn_extract_v8i64_s_v:
563; GFX10:       ; %bb.0: ; %entry
564; GFX10-NEXT:    s_mov_b32 s0, s2
565; GFX10-NEXT:    s_mov_b32 s2, s4
566; GFX10-NEXT:    s_mov_b32 s19, s5
567; GFX10-NEXT:    v_mov_b32_e32 v1, s2
568; GFX10-NEXT:    v_mov_b32_e32 v2, s19
569; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
570; GFX10-NEXT:    s_mov_b32 s1, s3
571; GFX10-NEXT:    s_mov_b32 s4, s6
572; GFX10-NEXT:    s_mov_b32 s5, s7
573; GFX10-NEXT:    s_mov_b32 s6, s8
574; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
575; GFX10-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
576; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
577; GFX10-NEXT:    s_mov_b32 s7, s9
578; GFX10-NEXT:    s_mov_b32 s8, s10
579; GFX10-NEXT:    s_mov_b32 s9, s11
580; GFX10-NEXT:    s_mov_b32 s10, s12
581; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
582; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
583; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
584; GFX10-NEXT:    s_mov_b32 s11, s13
585; GFX10-NEXT:    s_mov_b32 s12, s14
586; GFX10-NEXT:    s_mov_b32 s13, s15
587; GFX10-NEXT:    s_mov_b32 s14, s16
588; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
589; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
590; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
591; GFX10-NEXT:    s_mov_b32 s15, s17
592; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
593; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
594; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
595; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
596; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
597; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
598; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
599; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
600; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
601; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s14, vcc_lo
602; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s15, vcc_lo
603; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
604; GFX10-NEXT:    s_endpgm
605entry:
606  %ext = extractelement <8 x i64> %vec, i32 %sel
607  store i64 %ext, i64 addrspace(1)* undef
608  ret void
609}
610
611define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
612; GCN-LABEL: dyn_extract_v8i64_v_v:
613; GCN:       ; %bb.0: ; %entry
614; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
615; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
616; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
617; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
618; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
619; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
620; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
621; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
622; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
623; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
624; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
625; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
626; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
627; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
628; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
629; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
630; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
631; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
632; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
633; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
634; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
635; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
636; GCN-NEXT:    s_setpc_b64 s[30:31]
637;
638; GFX10-LABEL: dyn_extract_v8i64_v_v:
639; GFX10:       ; %bb.0: ; %entry
640; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
641; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
642; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
643; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
644; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
645; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
646; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
647; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
648; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
649; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
650; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
651; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
652; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
653; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
654; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
655; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
656; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
657; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
658; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
659; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
660; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
661; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
662; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
663; GFX10-NEXT:    s_setpc_b64 s[30:31]
664entry:
665  %ext = extractelement <8 x i64> %vec, i32 %sel
666  ret i64 %ext
667}
668
669define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
670; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
671; GPRIDX:       ; %bb.0: ; %entry
672; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
673; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
674; GPRIDX-NEXT:    v_mov_b32_e32 v16, v0
675; GPRIDX-NEXT:    v_mov_b32_e32 v17, v1
676; GPRIDX-NEXT:    s_set_gpr_idx_off
677; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[16:17], off
678; GPRIDX-NEXT:    s_endpgm
679;
680; MOVREL-LABEL: dyn_extract_v8i64_v_s:
681; MOVREL:       ; %bb.0: ; %entry
682; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
683; MOVREL-NEXT:    v_movrels_b32_e32 v16, v0
684; MOVREL-NEXT:    v_movrels_b32_e32 v17, v1
685; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[16:17]
686; MOVREL-NEXT:    s_endpgm
687;
688; GFX10-LABEL: dyn_extract_v8i64_v_s:
689; GFX10:       ; %bb.0: ; %entry
690; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
691; GFX10-NEXT:    v_movrels_b32_e32 v16, v0
692; GFX10-NEXT:    v_movrels_b32_e32 v17, v1
693; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[16:17], off
694; GFX10-NEXT:    s_endpgm
695entry:
696  %ext = extractelement <8 x i64> %vec, i32 %sel
697  store i64 %ext, i64 addrspace(1)* undef
698  ret void
699}
700
701define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) {
702; GPRIDX-LABEL: dyn_extract_v8i64_s_s:
703; GPRIDX:       ; %bb.0: ; %entry
704; GPRIDX-NEXT:    s_mov_b32 s0, s2
705; GPRIDX-NEXT:    s_mov_b32 s1, s3
706; GPRIDX-NEXT:    s_mov_b32 m0, s18
707; GPRIDX-NEXT:    s_mov_b32 s2, s4
708; GPRIDX-NEXT:    s_mov_b32 s3, s5
709; GPRIDX-NEXT:    s_mov_b32 s4, s6
710; GPRIDX-NEXT:    s_mov_b32 s5, s7
711; GPRIDX-NEXT:    s_mov_b32 s6, s8
712; GPRIDX-NEXT:    s_mov_b32 s7, s9
713; GPRIDX-NEXT:    s_mov_b32 s8, s10
714; GPRIDX-NEXT:    s_mov_b32 s9, s11
715; GPRIDX-NEXT:    s_mov_b32 s10, s12
716; GPRIDX-NEXT:    s_mov_b32 s11, s13
717; GPRIDX-NEXT:    s_mov_b32 s12, s14
718; GPRIDX-NEXT:    s_mov_b32 s13, s15
719; GPRIDX-NEXT:    s_mov_b32 s14, s16
720; GPRIDX-NEXT:    s_mov_b32 s15, s17
721; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[0:1]
722; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
723; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
724; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
725; GPRIDX-NEXT:    s_endpgm
726;
727; MOVREL-LABEL: dyn_extract_v8i64_s_s:
728; MOVREL:       ; %bb.0: ; %entry
729; MOVREL-NEXT:    s_mov_b32 s0, s2
730; MOVREL-NEXT:    s_mov_b32 s1, s3
731; MOVREL-NEXT:    s_mov_b32 m0, s18
732; MOVREL-NEXT:    s_mov_b32 s2, s4
733; MOVREL-NEXT:    s_mov_b32 s3, s5
734; MOVREL-NEXT:    s_mov_b32 s4, s6
735; MOVREL-NEXT:    s_mov_b32 s5, s7
736; MOVREL-NEXT:    s_mov_b32 s6, s8
737; MOVREL-NEXT:    s_mov_b32 s7, s9
738; MOVREL-NEXT:    s_mov_b32 s8, s10
739; MOVREL-NEXT:    s_mov_b32 s9, s11
740; MOVREL-NEXT:    s_mov_b32 s10, s12
741; MOVREL-NEXT:    s_mov_b32 s11, s13
742; MOVREL-NEXT:    s_mov_b32 s12, s14
743; MOVREL-NEXT:    s_mov_b32 s13, s15
744; MOVREL-NEXT:    s_mov_b32 s14, s16
745; MOVREL-NEXT:    s_mov_b32 s15, s17
746; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[0:1]
747; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
748; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
749; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
750; MOVREL-NEXT:    s_endpgm
751;
752; GFX10-LABEL: dyn_extract_v8i64_s_s:
753; GFX10:       ; %bb.0: ; %entry
754; GFX10-NEXT:    s_mov_b32 s0, s2
755; GFX10-NEXT:    s_mov_b32 s1, s3
756; GFX10-NEXT:    s_mov_b32 m0, s18
757; GFX10-NEXT:    s_mov_b32 s2, s4
758; GFX10-NEXT:    s_mov_b32 s3, s5
759; GFX10-NEXT:    s_mov_b32 s4, s6
760; GFX10-NEXT:    s_mov_b32 s5, s7
761; GFX10-NEXT:    s_mov_b32 s6, s8
762; GFX10-NEXT:    s_mov_b32 s7, s9
763; GFX10-NEXT:    s_mov_b32 s8, s10
764; GFX10-NEXT:    s_mov_b32 s9, s11
765; GFX10-NEXT:    s_mov_b32 s10, s12
766; GFX10-NEXT:    s_mov_b32 s11, s13
767; GFX10-NEXT:    s_mov_b32 s12, s14
768; GFX10-NEXT:    s_mov_b32 s13, s15
769; GFX10-NEXT:    s_mov_b32 s14, s16
770; GFX10-NEXT:    s_mov_b32 s15, s17
771; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
772; GFX10-NEXT:    v_mov_b32_e32 v0, s0
773; GFX10-NEXT:    v_mov_b32_e32 v1, s1
774; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
775; GFX10-NEXT:    s_endpgm
776entry:
777  %ext = extractelement <8 x i64> %vec, i32 %sel
778  store i64 %ext, i64 addrspace(1)* undef
779  ret void
780}
781
782define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) {
783; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3:
784; GPRIDX:       ; %bb.0: ; %entry
785; GPRIDX-NEXT:    s_add_i32 s10, s10, 3
786; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
787; GPRIDX-NEXT:    s_cselect_b32 s0, s3, s2
788; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
789; GPRIDX-NEXT:    s_cselect_b32 s0, s4, s0
790; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
791; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s0
792; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
793; GPRIDX-NEXT:    s_cselect_b32 s0, s6, s0
794; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
795; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s0
796; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
797; GPRIDX-NEXT:    s_cselect_b32 s0, s8, s0
798; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 7
799; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s0
800; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
801; GPRIDX-NEXT:    ; return to shader part epilog
802;
803; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3:
804; MOVREL:       ; %bb.0: ; %entry
805; MOVREL-NEXT:    s_mov_b32 s0, s2
806; MOVREL-NEXT:    s_mov_b32 s1, s3
807; MOVREL-NEXT:    s_mov_b32 s3, s5
808; MOVREL-NEXT:    s_mov_b32 m0, s10
809; MOVREL-NEXT:    s_mov_b32 s2, s4
810; MOVREL-NEXT:    s_mov_b32 s4, s6
811; MOVREL-NEXT:    s_mov_b32 s5, s7
812; MOVREL-NEXT:    s_mov_b32 s6, s8
813; MOVREL-NEXT:    s_mov_b32 s7, s9
814; MOVREL-NEXT:    s_movrels_b32 s0, s3
815; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
816; MOVREL-NEXT:    ; return to shader part epilog
817;
818; GFX10-LABEL: dyn_extract_v8f32_s_s_offset3:
819; GFX10:       ; %bb.0: ; %entry
820; GFX10-NEXT:    s_mov_b32 s1, s3
821; GFX10-NEXT:    s_mov_b32 s3, s5
822; GFX10-NEXT:    s_mov_b32 m0, s10
823; GFX10-NEXT:    s_mov_b32 s0, s2
824; GFX10-NEXT:    s_mov_b32 s2, s4
825; GFX10-NEXT:    s_mov_b32 s4, s6
826; GFX10-NEXT:    s_mov_b32 s5, s7
827; GFX10-NEXT:    s_mov_b32 s6, s8
828; GFX10-NEXT:    s_mov_b32 s7, s9
829; GFX10-NEXT:    s_movrels_b32 s0, s3
830; GFX10-NEXT:    v_mov_b32_e32 v0, s0
831; GFX10-NEXT:    ; return to shader part epilog
832entry:
833  %add = add i32 %sel, 3
834  %ext = extractelement <8 x float> %vec, i32 %add
835  ret float %ext
836}
837
838define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
839; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
840; GPRIDX:       ; %bb.0: ; %entry
841; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842; GPRIDX-NEXT:    v_add_u32_e32 v8, 3, v8
843; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
844; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
845; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
846; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
847; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
848; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
849; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
850; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
851; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
852; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
853; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
854; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
855; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
856; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
857; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
858;
859; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
860; MOVREL:       ; %bb.0: ; %entry
861; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
862; MOVREL-NEXT:    v_add_u32_e32 v8, vcc, 3, v8
863; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
864; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
865; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
866; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
867; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
868; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
869; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
870; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
871; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
872; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
873; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
874; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
875; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
876; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
877; MOVREL-NEXT:    s_setpc_b64 s[30:31]
878;
879; GFX10-LABEL: dyn_extract_v8f32_v_v_offset3:
880; GFX10:       ; %bb.0: ; %entry
881; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
882; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
883; GFX10-NEXT:    v_add_nc_u32_e32 v8, 3, v8
884; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
885; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
886; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
887; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
888; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
889; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
890; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
891; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
892; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
893; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
894; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
895; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
896; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
897; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
898; GFX10-NEXT:    s_setpc_b64 s[30:31]
899entry:
900  %add = add i32 %sel, 3
901  %ext = extractelement <8 x float> %vec, i32 %add
902  ret float %ext
903}
904
905define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) {
906; GCN-LABEL: dyn_extract_v8f64_s_s_offset1:
907; GCN:       ; %bb.0: ; %entry
908; GCN-NEXT:    s_mov_b32 s0, s2
909; GCN-NEXT:    s_mov_b32 s1, s3
910; GCN-NEXT:    s_mov_b32 s2, s4
911; GCN-NEXT:    s_mov_b32 s3, s5
912; GCN-NEXT:    s_mov_b32 m0, s18
913; GCN-NEXT:    s_mov_b32 s4, s6
914; GCN-NEXT:    s_mov_b32 s5, s7
915; GCN-NEXT:    s_mov_b32 s6, s8
916; GCN-NEXT:    s_mov_b32 s7, s9
917; GCN-NEXT:    s_mov_b32 s8, s10
918; GCN-NEXT:    s_mov_b32 s9, s11
919; GCN-NEXT:    s_mov_b32 s10, s12
920; GCN-NEXT:    s_mov_b32 s11, s13
921; GCN-NEXT:    s_mov_b32 s12, s14
922; GCN-NEXT:    s_mov_b32 s13, s15
923; GCN-NEXT:    s_mov_b32 s14, s16
924; GCN-NEXT:    s_mov_b32 s15, s17
925; GCN-NEXT:    s_movrels_b64 s[0:1], s[2:3]
926; GCN-NEXT:    ; return to shader part epilog
927;
928; GFX10-LABEL: dyn_extract_v8f64_s_s_offset1:
929; GFX10:       ; %bb.0: ; %entry
930; GFX10-NEXT:    s_mov_b32 s0, s2
931; GFX10-NEXT:    s_mov_b32 s1, s3
932; GFX10-NEXT:    s_mov_b32 s2, s4
933; GFX10-NEXT:    s_mov_b32 s3, s5
934; GFX10-NEXT:    s_mov_b32 m0, s18
935; GFX10-NEXT:    s_mov_b32 s4, s6
936; GFX10-NEXT:    s_mov_b32 s5, s7
937; GFX10-NEXT:    s_mov_b32 s6, s8
938; GFX10-NEXT:    s_mov_b32 s7, s9
939; GFX10-NEXT:    s_mov_b32 s8, s10
940; GFX10-NEXT:    s_mov_b32 s9, s11
941; GFX10-NEXT:    s_mov_b32 s10, s12
942; GFX10-NEXT:    s_mov_b32 s11, s13
943; GFX10-NEXT:    s_mov_b32 s12, s14
944; GFX10-NEXT:    s_mov_b32 s13, s15
945; GFX10-NEXT:    s_mov_b32 s14, s16
946; GFX10-NEXT:    s_mov_b32 s15, s17
947; GFX10-NEXT:    s_movrels_b64 s[0:1], s[2:3]
948; GFX10-NEXT:    ; return to shader part epilog
949entry:
950  %add = add i32 %sel, 1
951  %ext = extractelement <8 x double> %vec, i32 %add
952  ret double %ext
953}
954
955define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) {
956; GCN-LABEL: dyn_extract_v8f64_s_s_offset2:
957; GCN:       ; %bb.0: ; %entry
958; GCN-NEXT:    s_mov_b32 s0, s2
959; GCN-NEXT:    s_mov_b32 s1, s3
960; GCN-NEXT:    s_mov_b32 s2, s4
961; GCN-NEXT:    s_mov_b32 s3, s5
962; GCN-NEXT:    s_mov_b32 s4, s6
963; GCN-NEXT:    s_mov_b32 s5, s7
964; GCN-NEXT:    s_mov_b32 m0, s18
965; GCN-NEXT:    s_mov_b32 s6, s8
966; GCN-NEXT:    s_mov_b32 s7, s9
967; GCN-NEXT:    s_mov_b32 s8, s10
968; GCN-NEXT:    s_mov_b32 s9, s11
969; GCN-NEXT:    s_mov_b32 s10, s12
970; GCN-NEXT:    s_mov_b32 s11, s13
971; GCN-NEXT:    s_mov_b32 s12, s14
972; GCN-NEXT:    s_mov_b32 s13, s15
973; GCN-NEXT:    s_mov_b32 s14, s16
974; GCN-NEXT:    s_mov_b32 s15, s17
975; GCN-NEXT:    s_movrels_b64 s[0:1], s[4:5]
976; GCN-NEXT:    ; return to shader part epilog
977;
978; GFX10-LABEL: dyn_extract_v8f64_s_s_offset2:
979; GFX10:       ; %bb.0: ; %entry
980; GFX10-NEXT:    s_mov_b32 s0, s2
981; GFX10-NEXT:    s_mov_b32 s1, s3
982; GFX10-NEXT:    s_mov_b32 s2, s4
983; GFX10-NEXT:    s_mov_b32 s3, s5
984; GFX10-NEXT:    s_mov_b32 s4, s6
985; GFX10-NEXT:    s_mov_b32 s5, s7
986; GFX10-NEXT:    s_mov_b32 m0, s18
987; GFX10-NEXT:    s_mov_b32 s6, s8
988; GFX10-NEXT:    s_mov_b32 s7, s9
989; GFX10-NEXT:    s_mov_b32 s8, s10
990; GFX10-NEXT:    s_mov_b32 s9, s11
991; GFX10-NEXT:    s_mov_b32 s10, s12
992; GFX10-NEXT:    s_mov_b32 s11, s13
993; GFX10-NEXT:    s_mov_b32 s12, s14
994; GFX10-NEXT:    s_mov_b32 s13, s15
995; GFX10-NEXT:    s_mov_b32 s14, s16
996; GFX10-NEXT:    s_mov_b32 s15, s17
997; GFX10-NEXT:    s_movrels_b64 s[0:1], s[4:5]
998; GFX10-NEXT:    ; return to shader part epilog
999entry:
1000  %add = add i32 %sel, 2
1001  %ext = extractelement <8 x double> %vec, i32 %add
1002  ret double %ext
1003}
1004
1005define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) {
1006; GCN-LABEL: dyn_extract_v8f64_s_s_offset3:
1007; GCN:       ; %bb.0: ; %entry
1008; GCN-NEXT:    s_mov_b32 s0, s2
1009; GCN-NEXT:    s_mov_b32 s1, s3
1010; GCN-NEXT:    s_mov_b32 s2, s4
1011; GCN-NEXT:    s_mov_b32 s3, s5
1012; GCN-NEXT:    s_mov_b32 s4, s6
1013; GCN-NEXT:    s_mov_b32 s5, s7
1014; GCN-NEXT:    s_mov_b32 s6, s8
1015; GCN-NEXT:    s_mov_b32 s7, s9
1016; GCN-NEXT:    s_mov_b32 m0, s18
1017; GCN-NEXT:    s_mov_b32 s8, s10
1018; GCN-NEXT:    s_mov_b32 s9, s11
1019; GCN-NEXT:    s_mov_b32 s10, s12
1020; GCN-NEXT:    s_mov_b32 s11, s13
1021; GCN-NEXT:    s_mov_b32 s12, s14
1022; GCN-NEXT:    s_mov_b32 s13, s15
1023; GCN-NEXT:    s_mov_b32 s14, s16
1024; GCN-NEXT:    s_mov_b32 s15, s17
1025; GCN-NEXT:    s_movrels_b64 s[0:1], s[6:7]
1026; GCN-NEXT:    ; return to shader part epilog
1027;
1028; GFX10-LABEL: dyn_extract_v8f64_s_s_offset3:
1029; GFX10:       ; %bb.0: ; %entry
1030; GFX10-NEXT:    s_mov_b32 s0, s2
1031; GFX10-NEXT:    s_mov_b32 s1, s3
1032; GFX10-NEXT:    s_mov_b32 s2, s4
1033; GFX10-NEXT:    s_mov_b32 s3, s5
1034; GFX10-NEXT:    s_mov_b32 s4, s6
1035; GFX10-NEXT:    s_mov_b32 s5, s7
1036; GFX10-NEXT:    s_mov_b32 s6, s8
1037; GFX10-NEXT:    s_mov_b32 s7, s9
1038; GFX10-NEXT:    s_mov_b32 m0, s18
1039; GFX10-NEXT:    s_mov_b32 s8, s10
1040; GFX10-NEXT:    s_mov_b32 s9, s11
1041; GFX10-NEXT:    s_mov_b32 s10, s12
1042; GFX10-NEXT:    s_mov_b32 s11, s13
1043; GFX10-NEXT:    s_mov_b32 s12, s14
1044; GFX10-NEXT:    s_mov_b32 s13, s15
1045; GFX10-NEXT:    s_mov_b32 s14, s16
1046; GFX10-NEXT:    s_mov_b32 s15, s17
1047; GFX10-NEXT:    s_movrels_b64 s[0:1], s[6:7]
1048; GFX10-NEXT:    ; return to shader part epilog
1049entry:
1050  %add = add i32 %sel, 3
1051  %ext = extractelement <8 x double> %vec, i32 %add
1052  ret double %ext
1053}
1054
1055define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) {
1056; GCN-LABEL: dyn_extract_v8f64_s_s_offset4:
1057; GCN:       ; %bb.0: ; %entry
1058; GCN-NEXT:    s_mov_b32 s0, s2
1059; GCN-NEXT:    s_mov_b32 s1, s3
1060; GCN-NEXT:    s_mov_b32 s2, s4
1061; GCN-NEXT:    s_mov_b32 s3, s5
1062; GCN-NEXT:    s_mov_b32 s4, s6
1063; GCN-NEXT:    s_mov_b32 s5, s7
1064; GCN-NEXT:    s_mov_b32 s6, s8
1065; GCN-NEXT:    s_mov_b32 s7, s9
1066; GCN-NEXT:    s_mov_b32 s8, s10
1067; GCN-NEXT:    s_mov_b32 s9, s11
1068; GCN-NEXT:    s_mov_b32 m0, s18
1069; GCN-NEXT:    s_mov_b32 s10, s12
1070; GCN-NEXT:    s_mov_b32 s11, s13
1071; GCN-NEXT:    s_mov_b32 s12, s14
1072; GCN-NEXT:    s_mov_b32 s13, s15
1073; GCN-NEXT:    s_mov_b32 s14, s16
1074; GCN-NEXT:    s_mov_b32 s15, s17
1075; GCN-NEXT:    s_movrels_b64 s[0:1], s[8:9]
1076; GCN-NEXT:    ; return to shader part epilog
1077;
1078; GFX10-LABEL: dyn_extract_v8f64_s_s_offset4:
1079; GFX10:       ; %bb.0: ; %entry
1080; GFX10-NEXT:    s_mov_b32 s0, s2
1081; GFX10-NEXT:    s_mov_b32 s1, s3
1082; GFX10-NEXT:    s_mov_b32 s2, s4
1083; GFX10-NEXT:    s_mov_b32 s3, s5
1084; GFX10-NEXT:    s_mov_b32 s4, s6
1085; GFX10-NEXT:    s_mov_b32 s5, s7
1086; GFX10-NEXT:    s_mov_b32 s6, s8
1087; GFX10-NEXT:    s_mov_b32 s7, s9
1088; GFX10-NEXT:    s_mov_b32 s8, s10
1089; GFX10-NEXT:    s_mov_b32 s9, s11
1090; GFX10-NEXT:    s_mov_b32 m0, s18
1091; GFX10-NEXT:    s_mov_b32 s10, s12
1092; GFX10-NEXT:    s_mov_b32 s11, s13
1093; GFX10-NEXT:    s_mov_b32 s12, s14
1094; GFX10-NEXT:    s_mov_b32 s13, s15
1095; GFX10-NEXT:    s_mov_b32 s14, s16
1096; GFX10-NEXT:    s_mov_b32 s15, s17
1097; GFX10-NEXT:    s_movrels_b64 s[0:1], s[8:9]
1098; GFX10-NEXT:    ; return to shader part epilog
1099entry:
1100  %add = add i32 %sel, 4
1101  %ext = extractelement <8 x double> %vec, i32 %add
1102  ret double %ext
1103}
1104
1105define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) {
1106; GCN-LABEL: dyn_extract_v8f64_s_s_offset5:
1107; GCN:       ; %bb.0: ; %entry
1108; GCN-NEXT:    s_mov_b32 s0, s2
1109; GCN-NEXT:    s_mov_b32 s1, s3
1110; GCN-NEXT:    s_mov_b32 s2, s4
1111; GCN-NEXT:    s_mov_b32 s3, s5
1112; GCN-NEXT:    s_mov_b32 s4, s6
1113; GCN-NEXT:    s_mov_b32 s5, s7
1114; GCN-NEXT:    s_mov_b32 s6, s8
1115; GCN-NEXT:    s_mov_b32 s7, s9
1116; GCN-NEXT:    s_mov_b32 s8, s10
1117; GCN-NEXT:    s_mov_b32 s9, s11
1118; GCN-NEXT:    s_mov_b32 s10, s12
1119; GCN-NEXT:    s_mov_b32 s11, s13
1120; GCN-NEXT:    s_mov_b32 m0, s18
1121; GCN-NEXT:    s_mov_b32 s12, s14
1122; GCN-NEXT:    s_mov_b32 s13, s15
1123; GCN-NEXT:    s_mov_b32 s14, s16
1124; GCN-NEXT:    s_mov_b32 s15, s17
1125; GCN-NEXT:    s_movrels_b64 s[0:1], s[10:11]
1126; GCN-NEXT:    ; return to shader part epilog
1127;
1128; GFX10-LABEL: dyn_extract_v8f64_s_s_offset5:
1129; GFX10:       ; %bb.0: ; %entry
1130; GFX10-NEXT:    s_mov_b32 s0, s2
1131; GFX10-NEXT:    s_mov_b32 s1, s3
1132; GFX10-NEXT:    s_mov_b32 s2, s4
1133; GFX10-NEXT:    s_mov_b32 s3, s5
1134; GFX10-NEXT:    s_mov_b32 s4, s6
1135; GFX10-NEXT:    s_mov_b32 s5, s7
1136; GFX10-NEXT:    s_mov_b32 s6, s8
1137; GFX10-NEXT:    s_mov_b32 s7, s9
1138; GFX10-NEXT:    s_mov_b32 s8, s10
1139; GFX10-NEXT:    s_mov_b32 s9, s11
1140; GFX10-NEXT:    s_mov_b32 s10, s12
1141; GFX10-NEXT:    s_mov_b32 s11, s13
1142; GFX10-NEXT:    s_mov_b32 m0, s18
1143; GFX10-NEXT:    s_mov_b32 s12, s14
1144; GFX10-NEXT:    s_mov_b32 s13, s15
1145; GFX10-NEXT:    s_mov_b32 s14, s16
1146; GFX10-NEXT:    s_mov_b32 s15, s17
1147; GFX10-NEXT:    s_movrels_b64 s[0:1], s[10:11]
1148; GFX10-NEXT:    ; return to shader part epilog
1149entry:
1150  %add = add i32 %sel, 5
1151  %ext = extractelement <8 x double> %vec, i32 %add
1152  ret double %ext
1153}
1154
1155define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) {
1156; GCN-LABEL: dyn_extract_v8f64_s_s_offset6:
1157; GCN:       ; %bb.0: ; %entry
1158; GCN-NEXT:    s_mov_b32 s0, s2
1159; GCN-NEXT:    s_mov_b32 s1, s3
1160; GCN-NEXT:    s_mov_b32 s2, s4
1161; GCN-NEXT:    s_mov_b32 s3, s5
1162; GCN-NEXT:    s_mov_b32 s4, s6
1163; GCN-NEXT:    s_mov_b32 s5, s7
1164; GCN-NEXT:    s_mov_b32 s6, s8
1165; GCN-NEXT:    s_mov_b32 s7, s9
1166; GCN-NEXT:    s_mov_b32 s8, s10
1167; GCN-NEXT:    s_mov_b32 s9, s11
1168; GCN-NEXT:    s_mov_b32 s10, s12
1169; GCN-NEXT:    s_mov_b32 s11, s13
1170; GCN-NEXT:    s_mov_b32 s12, s14
1171; GCN-NEXT:    s_mov_b32 s13, s15
1172; GCN-NEXT:    s_mov_b32 m0, s18
1173; GCN-NEXT:    s_mov_b32 s14, s16
1174; GCN-NEXT:    s_mov_b32 s15, s17
1175; GCN-NEXT:    s_movrels_b64 s[0:1], s[12:13]
1176; GCN-NEXT:    ; return to shader part epilog
1177;
1178; GFX10-LABEL: dyn_extract_v8f64_s_s_offset6:
1179; GFX10:       ; %bb.0: ; %entry
1180; GFX10-NEXT:    s_mov_b32 s0, s2
1181; GFX10-NEXT:    s_mov_b32 s1, s3
1182; GFX10-NEXT:    s_mov_b32 s2, s4
1183; GFX10-NEXT:    s_mov_b32 s3, s5
1184; GFX10-NEXT:    s_mov_b32 s4, s6
1185; GFX10-NEXT:    s_mov_b32 s5, s7
1186; GFX10-NEXT:    s_mov_b32 s6, s8
1187; GFX10-NEXT:    s_mov_b32 s7, s9
1188; GFX10-NEXT:    s_mov_b32 s8, s10
1189; GFX10-NEXT:    s_mov_b32 s9, s11
1190; GFX10-NEXT:    s_mov_b32 s10, s12
1191; GFX10-NEXT:    s_mov_b32 s11, s13
1192; GFX10-NEXT:    s_mov_b32 s12, s14
1193; GFX10-NEXT:    s_mov_b32 s13, s15
1194; GFX10-NEXT:    s_mov_b32 m0, s18
1195; GFX10-NEXT:    s_mov_b32 s14, s16
1196; GFX10-NEXT:    s_mov_b32 s15, s17
1197; GFX10-NEXT:    s_movrels_b64 s[0:1], s[12:13]
1198; GFX10-NEXT:    ; return to shader part epilog
1199entry:
1200  %add = add i32 %sel, 6
1201  %ext = extractelement <8 x double> %vec, i32 %add
1202  ret double %ext
1203}
1204
1205define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) {
1206; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7:
1207; GPRIDX:       ; %bb.0: ; %entry
1208; GPRIDX-NEXT:    s_mov_b32 s0, s2
1209; GPRIDX-NEXT:    s_mov_b32 s1, s3
1210; GPRIDX-NEXT:    s_mov_b32 s2, s4
1211; GPRIDX-NEXT:    s_mov_b32 s3, s5
1212; GPRIDX-NEXT:    s_mov_b32 s4, s6
1213; GPRIDX-NEXT:    s_mov_b32 s5, s7
1214; GPRIDX-NEXT:    s_mov_b32 s6, s8
1215; GPRIDX-NEXT:    s_mov_b32 s7, s9
1216; GPRIDX-NEXT:    s_mov_b32 s8, s10
1217; GPRIDX-NEXT:    s_mov_b32 s9, s11
1218; GPRIDX-NEXT:    s_mov_b32 s10, s12
1219; GPRIDX-NEXT:    s_mov_b32 s11, s13
1220; GPRIDX-NEXT:    s_mov_b32 s12, s14
1221; GPRIDX-NEXT:    s_mov_b32 s13, s15
1222; GPRIDX-NEXT:    s_mov_b32 s14, s16
1223; GPRIDX-NEXT:    s_mov_b32 s15, s17
1224; GPRIDX-NEXT:    s_mov_b32 m0, s18
1225; GPRIDX-NEXT:    s_nop 0
1226; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1227; GPRIDX-NEXT:    ; return to shader part epilog
1228;
1229; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
1230; MOVREL:       ; %bb.0: ; %entry
1231; MOVREL-NEXT:    s_mov_b32 s0, s2
1232; MOVREL-NEXT:    s_mov_b32 s1, s3
1233; MOVREL-NEXT:    s_mov_b32 s2, s4
1234; MOVREL-NEXT:    s_mov_b32 s3, s5
1235; MOVREL-NEXT:    s_mov_b32 s4, s6
1236; MOVREL-NEXT:    s_mov_b32 s5, s7
1237; MOVREL-NEXT:    s_mov_b32 s6, s8
1238; MOVREL-NEXT:    s_mov_b32 s7, s9
1239; MOVREL-NEXT:    s_mov_b32 s8, s10
1240; MOVREL-NEXT:    s_mov_b32 s9, s11
1241; MOVREL-NEXT:    s_mov_b32 s10, s12
1242; MOVREL-NEXT:    s_mov_b32 s11, s13
1243; MOVREL-NEXT:    s_mov_b32 s12, s14
1244; MOVREL-NEXT:    s_mov_b32 s13, s15
1245; MOVREL-NEXT:    s_mov_b32 s14, s16
1246; MOVREL-NEXT:    s_mov_b32 s15, s17
1247; MOVREL-NEXT:    s_mov_b32 m0, s18
1248; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1249; MOVREL-NEXT:    ; return to shader part epilog
1250;
1251; GFX10-LABEL: dyn_extract_v8f64_s_s_offset7:
1252; GFX10:       ; %bb.0: ; %entry
1253; GFX10-NEXT:    s_mov_b32 s0, s2
1254; GFX10-NEXT:    s_mov_b32 s1, s3
1255; GFX10-NEXT:    s_mov_b32 s2, s4
1256; GFX10-NEXT:    s_mov_b32 s3, s5
1257; GFX10-NEXT:    s_mov_b32 s4, s6
1258; GFX10-NEXT:    s_mov_b32 s5, s7
1259; GFX10-NEXT:    s_mov_b32 s6, s8
1260; GFX10-NEXT:    s_mov_b32 s7, s9
1261; GFX10-NEXT:    s_mov_b32 s8, s10
1262; GFX10-NEXT:    s_mov_b32 s9, s11
1263; GFX10-NEXT:    s_mov_b32 s10, s12
1264; GFX10-NEXT:    s_mov_b32 s11, s13
1265; GFX10-NEXT:    s_mov_b32 s12, s14
1266; GFX10-NEXT:    s_mov_b32 s13, s15
1267; GFX10-NEXT:    s_mov_b32 s14, s16
1268; GFX10-NEXT:    s_mov_b32 s15, s17
1269; GFX10-NEXT:    s_mov_b32 m0, s18
1270; GFX10-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1271; GFX10-NEXT:    ; return to shader part epilog
1272entry:
1273  %add = add i32 %sel, 7
1274  %ext = extractelement <8 x double> %vec, i32 %add
1275  ret double %ext
1276}
1277
1278define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) {
1279; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1280; GCN:       ; %bb.0: ; %entry
1281; GCN-NEXT:    s_mov_b32 s0, s2
1282; GCN-NEXT:    s_mov_b32 s1, s3
1283; GCN-NEXT:    s_add_i32 m0, s18, -1
1284; GCN-NEXT:    s_mov_b32 s2, s4
1285; GCN-NEXT:    s_mov_b32 s3, s5
1286; GCN-NEXT:    s_mov_b32 s4, s6
1287; GCN-NEXT:    s_mov_b32 s5, s7
1288; GCN-NEXT:    s_mov_b32 s6, s8
1289; GCN-NEXT:    s_mov_b32 s7, s9
1290; GCN-NEXT:    s_mov_b32 s8, s10
1291; GCN-NEXT:    s_mov_b32 s9, s11
1292; GCN-NEXT:    s_mov_b32 s10, s12
1293; GCN-NEXT:    s_mov_b32 s11, s13
1294; GCN-NEXT:    s_mov_b32 s12, s14
1295; GCN-NEXT:    s_mov_b32 s13, s15
1296; GCN-NEXT:    s_mov_b32 s14, s16
1297; GCN-NEXT:    s_mov_b32 s15, s17
1298; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1299; GCN-NEXT:    ; return to shader part epilog
1300;
1301; GFX10-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1302; GFX10:       ; %bb.0: ; %entry
1303; GFX10-NEXT:    s_mov_b32 s0, s2
1304; GFX10-NEXT:    s_mov_b32 s1, s3
1305; GFX10-NEXT:    s_add_i32 m0, s18, -1
1306; GFX10-NEXT:    s_mov_b32 s2, s4
1307; GFX10-NEXT:    s_mov_b32 s3, s5
1308; GFX10-NEXT:    s_mov_b32 s4, s6
1309; GFX10-NEXT:    s_mov_b32 s5, s7
1310; GFX10-NEXT:    s_mov_b32 s6, s8
1311; GFX10-NEXT:    s_mov_b32 s7, s9
1312; GFX10-NEXT:    s_mov_b32 s8, s10
1313; GFX10-NEXT:    s_mov_b32 s9, s11
1314; GFX10-NEXT:    s_mov_b32 s10, s12
1315; GFX10-NEXT:    s_mov_b32 s11, s13
1316; GFX10-NEXT:    s_mov_b32 s12, s14
1317; GFX10-NEXT:    s_mov_b32 s13, s15
1318; GFX10-NEXT:    s_mov_b32 s14, s16
1319; GFX10-NEXT:    s_mov_b32 s15, s17
1320; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1321; GFX10-NEXT:    ; return to shader part epilog
1322entry:
1323  %add = add i32 %sel, -1
1324  %ext = extractelement <8 x double> %vec, i32 %add
1325  ret double %ext
1326}
1327
1328define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
1329; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
1330; GPRIDX:       ; %bb.0: ; %entry
1331; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332; GPRIDX-NEXT:    v_add_u32_e32 v16, 3, v16
1333; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1334; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1335; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1336; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1337; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1338; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1339; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1340; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1341; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1342; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1343; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1344; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1345; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1346; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1347; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1348; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1349; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1350; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1351; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1352; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1353; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1354; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
1355;
1356; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
1357; MOVREL:       ; %bb.0: ; %entry
1358; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1359; MOVREL-NEXT:    v_add_u32_e32 v16, vcc, 3, v16
1360; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1361; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1362; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1363; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1364; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1365; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1366; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1367; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1368; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1369; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1370; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1371; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1372; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1373; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1374; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1375; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1376; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1377; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1378; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1379; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1380; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1381; MOVREL-NEXT:    s_setpc_b64 s[30:31]
1382;
1383; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3:
1384; GFX10:       ; %bb.0: ; %entry
1385; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1386; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1387; GFX10-NEXT:    v_add_nc_u32_e32 v16, 3, v16
1388; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1389; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1390; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1391; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1392; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1393; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1394; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1395; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1396; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1397; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1398; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1399; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1400; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1401; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1402; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1403; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1404; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1405; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1406; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1407; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1408; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1409; GFX10-NEXT:    s_setpc_b64 s[30:31]
1410entry:
1411  %add = add i32 %sel, 3
1412  %ext = extractelement <8 x double> %vec, i32 %add
1413  ret double %ext
1414}
1415
1416define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) {
1417; GCN-LABEL: dyn_extract_v8p3_v_v:
1418; GCN:       ; %bb.0: ; %entry
1419; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1420; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
1421; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1422; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
1423; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1424; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
1425; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1426; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
1427; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1428; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
1429; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
1430; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
1431; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1432; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
1433; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
1434; GCN-NEXT:    s_setpc_b64 s[30:31]
1435;
1436; GFX10-LABEL: dyn_extract_v8p3_v_v:
1437; GFX10:       ; %bb.0: ; %entry
1438; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1440; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
1441; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1442; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
1443; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1444; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
1445; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1446; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
1447; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1448; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
1449; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1450; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
1451; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1452; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
1453; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
1454; GFX10-NEXT:    s_setpc_b64 s[30:31]
1455entry:
1456  %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
1457  ret i8 addrspace(3)* %ext
1458}
1459
1460define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) {
1461; GPRIDX-LABEL: dyn_extract_v8p3_s_s:
1462; GPRIDX:       ; %bb.0: ; %entry
1463; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
1464; GPRIDX-NEXT:    s_cselect_b32 s0, s3, s2
1465; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
1466; GPRIDX-NEXT:    s_cselect_b32 s0, s4, s0
1467; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
1468; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s0
1469; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
1470; GPRIDX-NEXT:    s_cselect_b32 s0, s6, s0
1471; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
1472; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s0
1473; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
1474; GPRIDX-NEXT:    s_cselect_b32 s0, s8, s0
1475; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 7
1476; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s0
1477; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1478; GPRIDX-NEXT:    ds_write_b32 v0, v0
1479; GPRIDX-NEXT:    s_endpgm
1480;
1481; MOVREL-LABEL: dyn_extract_v8p3_s_s:
1482; MOVREL:       ; %bb.0: ; %entry
1483; MOVREL-NEXT:    s_mov_b32 s0, s2
1484; MOVREL-NEXT:    s_mov_b32 m0, s10
1485; MOVREL-NEXT:    s_mov_b32 s1, s3
1486; MOVREL-NEXT:    s_mov_b32 s2, s4
1487; MOVREL-NEXT:    s_mov_b32 s3, s5
1488; MOVREL-NEXT:    s_mov_b32 s4, s6
1489; MOVREL-NEXT:    s_mov_b32 s5, s7
1490; MOVREL-NEXT:    s_mov_b32 s6, s8
1491; MOVREL-NEXT:    s_mov_b32 s7, s9
1492; MOVREL-NEXT:    s_movrels_b32 s0, s0
1493; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
1494; MOVREL-NEXT:    s_mov_b32 m0, -1
1495; MOVREL-NEXT:    ds_write_b32 v0, v0
1496; MOVREL-NEXT:    s_endpgm
1497;
1498; GFX10-LABEL: dyn_extract_v8p3_s_s:
1499; GFX10:       ; %bb.0: ; %entry
1500; GFX10-NEXT:    s_mov_b32 s0, s2
1501; GFX10-NEXT:    s_mov_b32 m0, s10
1502; GFX10-NEXT:    s_mov_b32 s1, s3
1503; GFX10-NEXT:    s_mov_b32 s2, s4
1504; GFX10-NEXT:    s_mov_b32 s3, s5
1505; GFX10-NEXT:    s_mov_b32 s4, s6
1506; GFX10-NEXT:    s_mov_b32 s5, s7
1507; GFX10-NEXT:    s_mov_b32 s6, s8
1508; GFX10-NEXT:    s_mov_b32 s7, s9
1509; GFX10-NEXT:    s_movrels_b32 s0, s0
1510; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1511; GFX10-NEXT:    ds_write_b32 v0, v0
1512; GFX10-NEXT:    s_endpgm
1513entry:
1514  %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
1515  store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef
1516  ret void
1517}
1518
1519define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) {
1520; GCN-LABEL: dyn_extract_v8p1_v_v:
1521; GCN:       ; %bb.0: ; %entry
1522; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1523; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1524; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1525; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1526; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1527; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1528; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1529; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1530; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1531; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1532; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1533; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1534; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1535; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1536; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1537; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1538; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1539; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1540; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1541; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1542; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1543; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1544; GCN-NEXT:    s_setpc_b64 s[30:31]
1545;
1546; GFX10-LABEL: dyn_extract_v8p1_v_v:
1547; GFX10:       ; %bb.0: ; %entry
1548; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1549; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1550; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1551; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1552; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1553; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1554; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1555; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1556; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1557; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1558; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1559; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1560; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1561; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1562; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1563; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1564; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1565; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1566; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1567; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1568; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1569; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1570; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1571; GFX10-NEXT:    s_setpc_b64 s[30:31]
1572entry:
1573  %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
1574  ret i8 addrspace(1)* %ext
1575}
1576
1577define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) {
1578; GPRIDX-LABEL: dyn_extract_v8p1_s_s:
1579; GPRIDX:       ; %bb.0: ; %entry
1580; GPRIDX-NEXT:    s_mov_b32 s0, s2
1581; GPRIDX-NEXT:    s_mov_b32 s1, s3
1582; GPRIDX-NEXT:    s_mov_b32 m0, s18
1583; GPRIDX-NEXT:    s_mov_b32 s2, s4
1584; GPRIDX-NEXT:    s_mov_b32 s3, s5
1585; GPRIDX-NEXT:    s_mov_b32 s4, s6
1586; GPRIDX-NEXT:    s_mov_b32 s5, s7
1587; GPRIDX-NEXT:    s_mov_b32 s6, s8
1588; GPRIDX-NEXT:    s_mov_b32 s7, s9
1589; GPRIDX-NEXT:    s_mov_b32 s8, s10
1590; GPRIDX-NEXT:    s_mov_b32 s9, s11
1591; GPRIDX-NEXT:    s_mov_b32 s10, s12
1592; GPRIDX-NEXT:    s_mov_b32 s11, s13
1593; GPRIDX-NEXT:    s_mov_b32 s12, s14
1594; GPRIDX-NEXT:    s_mov_b32 s13, s15
1595; GPRIDX-NEXT:    s_mov_b32 s14, s16
1596; GPRIDX-NEXT:    s_mov_b32 s15, s17
1597; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1598; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1599; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
1600; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1601; GPRIDX-NEXT:    s_endpgm
1602;
1603; MOVREL-LABEL: dyn_extract_v8p1_s_s:
1604; MOVREL:       ; %bb.0: ; %entry
1605; MOVREL-NEXT:    s_mov_b32 s0, s2
1606; MOVREL-NEXT:    s_mov_b32 s1, s3
1607; MOVREL-NEXT:    s_mov_b32 m0, s18
1608; MOVREL-NEXT:    s_mov_b32 s2, s4
1609; MOVREL-NEXT:    s_mov_b32 s3, s5
1610; MOVREL-NEXT:    s_mov_b32 s4, s6
1611; MOVREL-NEXT:    s_mov_b32 s5, s7
1612; MOVREL-NEXT:    s_mov_b32 s6, s8
1613; MOVREL-NEXT:    s_mov_b32 s7, s9
1614; MOVREL-NEXT:    s_mov_b32 s8, s10
1615; MOVREL-NEXT:    s_mov_b32 s9, s11
1616; MOVREL-NEXT:    s_mov_b32 s10, s12
1617; MOVREL-NEXT:    s_mov_b32 s11, s13
1618; MOVREL-NEXT:    s_mov_b32 s12, s14
1619; MOVREL-NEXT:    s_mov_b32 s13, s15
1620; MOVREL-NEXT:    s_mov_b32 s14, s16
1621; MOVREL-NEXT:    s_mov_b32 s15, s17
1622; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1623; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
1624; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
1625; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1626; MOVREL-NEXT:    s_endpgm
1627;
1628; GFX10-LABEL: dyn_extract_v8p1_s_s:
1629; GFX10:       ; %bb.0: ; %entry
1630; GFX10-NEXT:    s_mov_b32 s0, s2
1631; GFX10-NEXT:    s_mov_b32 s1, s3
1632; GFX10-NEXT:    s_mov_b32 m0, s18
1633; GFX10-NEXT:    s_mov_b32 s2, s4
1634; GFX10-NEXT:    s_mov_b32 s3, s5
1635; GFX10-NEXT:    s_mov_b32 s4, s6
1636; GFX10-NEXT:    s_mov_b32 s5, s7
1637; GFX10-NEXT:    s_mov_b32 s6, s8
1638; GFX10-NEXT:    s_mov_b32 s7, s9
1639; GFX10-NEXT:    s_mov_b32 s8, s10
1640; GFX10-NEXT:    s_mov_b32 s9, s11
1641; GFX10-NEXT:    s_mov_b32 s10, s12
1642; GFX10-NEXT:    s_mov_b32 s11, s13
1643; GFX10-NEXT:    s_mov_b32 s12, s14
1644; GFX10-NEXT:    s_mov_b32 s13, s15
1645; GFX10-NEXT:    s_mov_b32 s14, s16
1646; GFX10-NEXT:    s_mov_b32 s15, s17
1647; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1648; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1649; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1650; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1651; GFX10-NEXT:    s_endpgm
1652entry:
1653  %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
1654  store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef
1655  ret void
1656}
1657
1658define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) {
1659; GPRIDX-LABEL: dyn_extract_v16f32_v_s:
1660; GPRIDX:       ; %bb.0: ; %entry
1661; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
1662; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
1663; GPRIDX-NEXT:    s_set_gpr_idx_off
1664; GPRIDX-NEXT:    ; return to shader part epilog
1665;
1666; MOVREL-LABEL: dyn_extract_v16f32_v_s:
1667; MOVREL:       ; %bb.0: ; %entry
1668; MOVREL-NEXT:    s_mov_b32 m0, s2
1669; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
1670; MOVREL-NEXT:    ; return to shader part epilog
1671;
1672; GFX10-LABEL: dyn_extract_v16f32_v_s:
1673; GFX10:       ; %bb.0: ; %entry
1674; GFX10-NEXT:    s_mov_b32 m0, s2
1675; GFX10-NEXT:    v_movrels_b32_e32 v0, v0
1676; GFX10-NEXT:    ; return to shader part epilog
1677entry:
1678  %ext = extractelement <16 x float> %vec, i32 %sel
1679  ret float %ext
1680}
1681
1682define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) {
1683; GPRIDX-LABEL: dyn_extract_v32f32_v_s:
1684; GPRIDX:       ; %bb.0: ; %entry
1685; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
1686; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
1687; GPRIDX-NEXT:    s_set_gpr_idx_off
1688; GPRIDX-NEXT:    ; return to shader part epilog
1689;
1690; MOVREL-LABEL: dyn_extract_v32f32_v_s:
1691; MOVREL:       ; %bb.0: ; %entry
1692; MOVREL-NEXT:    s_mov_b32 m0, s2
1693; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
1694; MOVREL-NEXT:    ; return to shader part epilog
1695;
1696; GFX10-LABEL: dyn_extract_v32f32_v_s:
1697; GFX10:       ; %bb.0: ; %entry
1698; GFX10-NEXT:    s_mov_b32 m0, s2
1699; GFX10-NEXT:    v_movrels_b32_e32 v0, v0
1700; GFX10-NEXT:    ; return to shader part epilog
1701entry:
1702  %ext = extractelement <32 x float> %vec, i32 %sel
1703  ret float %ext
1704}
1705
1706define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) {
1707; GPRIDX-LABEL: dyn_extract_v16f64_v_s:
1708; GPRIDX:       ; %bb.0: ; %entry
1709; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
1710; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
1711; GPRIDX-NEXT:    v_mov_b32_e32 v32, v0
1712; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
1713; GPRIDX-NEXT:    s_set_gpr_idx_off
1714; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v32
1715; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
1716; GPRIDX-NEXT:    ; return to shader part epilog
1717;
1718; MOVREL-LABEL: dyn_extract_v16f64_v_s:
1719; MOVREL:       ; %bb.0: ; %entry
1720; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
1721; MOVREL-NEXT:    v_movrels_b32_e32 v32, v0
1722; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
1723; MOVREL-NEXT:    v_readfirstlane_b32 s0, v32
1724; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
1725; MOVREL-NEXT:    ; return to shader part epilog
1726;
1727; GFX10-LABEL: dyn_extract_v16f64_v_s:
1728; GFX10:       ; %bb.0: ; %entry
1729; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
1730; GFX10-NEXT:    v_movrels_b32_e32 v32, v0
1731; GFX10-NEXT:    v_movrels_b32_e32 v0, v1
1732; GFX10-NEXT:    v_readfirstlane_b32 s0, v32
1733; GFX10-NEXT:    v_readfirstlane_b32 s1, v0
1734; GFX10-NEXT:    ; return to shader part epilog
1735entry:
1736  %ext = extractelement <16 x double> %vec, i32 %sel
1737  ret double %ext
1738}
1739
1740define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) {
1741; GCN-LABEL: dyn_extract_v16f32_s_s:
1742; GCN:       ; %bb.0: ; %entry
1743; GCN-NEXT:    s_mov_b32 s4, 1.0
1744; GCN-NEXT:    s_mov_b32 m0, s2
1745; GCN-NEXT:    s_mov_b32 s19, 0x41800000
1746; GCN-NEXT:    s_mov_b32 s18, 0x41700000
1747; GCN-NEXT:    s_mov_b32 s17, 0x41600000
1748; GCN-NEXT:    s_mov_b32 s16, 0x41500000
1749; GCN-NEXT:    s_mov_b32 s15, 0x41400000
1750; GCN-NEXT:    s_mov_b32 s14, 0x41300000
1751; GCN-NEXT:    s_mov_b32 s13, 0x41200000
1752; GCN-NEXT:    s_mov_b32 s12, 0x41100000
1753; GCN-NEXT:    s_mov_b32 s11, 0x41000000
1754; GCN-NEXT:    s_mov_b32 s10, 0x40e00000
1755; GCN-NEXT:    s_mov_b32 s9, 0x40c00000
1756; GCN-NEXT:    s_mov_b32 s8, 0x40a00000
1757; GCN-NEXT:    s_mov_b32 s7, 4.0
1758; GCN-NEXT:    s_mov_b32 s6, 0x40400000
1759; GCN-NEXT:    s_mov_b32 s5, 2.0
1760; GCN-NEXT:    s_movrels_b32 s0, s4
1761; GCN-NEXT:    v_mov_b32_e32 v0, s0
1762; GCN-NEXT:    ; return to shader part epilog
1763;
1764; GFX10-LABEL: dyn_extract_v16f32_s_s:
1765; GFX10:       ; %bb.0: ; %entry
1766; GFX10-NEXT:    s_mov_b32 s4, 1.0
1767; GFX10-NEXT:    s_mov_b32 m0, s2
1768; GFX10-NEXT:    s_mov_b32 s19, 0x41800000
1769; GFX10-NEXT:    s_mov_b32 s18, 0x41700000
1770; GFX10-NEXT:    s_mov_b32 s17, 0x41600000
1771; GFX10-NEXT:    s_mov_b32 s16, 0x41500000
1772; GFX10-NEXT:    s_mov_b32 s15, 0x41400000
1773; GFX10-NEXT:    s_mov_b32 s14, 0x41300000
1774; GFX10-NEXT:    s_mov_b32 s13, 0x41200000
1775; GFX10-NEXT:    s_mov_b32 s12, 0x41100000
1776; GFX10-NEXT:    s_mov_b32 s11, 0x41000000
1777; GFX10-NEXT:    s_mov_b32 s10, 0x40e00000
1778; GFX10-NEXT:    s_mov_b32 s9, 0x40c00000
1779; GFX10-NEXT:    s_mov_b32 s8, 0x40a00000
1780; GFX10-NEXT:    s_mov_b32 s7, 4.0
1781; GFX10-NEXT:    s_mov_b32 s6, 0x40400000
1782; GFX10-NEXT:    s_mov_b32 s5, 2.0
1783; GFX10-NEXT:    s_movrels_b32 s0, s4
1784; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1785; GFX10-NEXT:    ; return to shader part epilog
1786entry:
1787  %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel
1788  ret float %ext
1789}
1790
1791define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) {
1792; GCN-LABEL: dyn_extract_v32f32_s_s:
1793; GCN:       ; %bb.0: ; %entry
1794; GCN-NEXT:    s_mov_b32 s36, 1.0
1795; GCN-NEXT:    s_mov_b32 m0, s2
1796; GCN-NEXT:    s_mov_b32 s67, 0x42000000
1797; GCN-NEXT:    s_mov_b32 s66, 0x41f80000
1798; GCN-NEXT:    s_mov_b32 s65, 0x41f00000
1799; GCN-NEXT:    s_mov_b32 s64, 0x41e80000
1800; GCN-NEXT:    s_mov_b32 s63, 0x41e00000
1801; GCN-NEXT:    s_mov_b32 s62, 0x41d80000
1802; GCN-NEXT:    s_mov_b32 s61, 0x41d00000
1803; GCN-NEXT:    s_mov_b32 s60, 0x41c80000
1804; GCN-NEXT:    s_mov_b32 s59, 0x41c00000
1805; GCN-NEXT:    s_mov_b32 s58, 0x41b80000
1806; GCN-NEXT:    s_mov_b32 s57, 0x41b00000
1807; GCN-NEXT:    s_mov_b32 s56, 0x41a80000
1808; GCN-NEXT:    s_mov_b32 s55, 0x41a00000
1809; GCN-NEXT:    s_mov_b32 s54, 0x41980000
1810; GCN-NEXT:    s_mov_b32 s53, 0x41900000
1811; GCN-NEXT:    s_mov_b32 s52, 0x41880000
1812; GCN-NEXT:    s_mov_b32 s51, 0x41800000
1813; GCN-NEXT:    s_mov_b32 s50, 0x41700000
1814; GCN-NEXT:    s_mov_b32 s49, 0x41600000
1815; GCN-NEXT:    s_mov_b32 s48, 0x41500000
1816; GCN-NEXT:    s_mov_b32 s47, 0x41400000
1817; GCN-NEXT:    s_mov_b32 s46, 0x41300000
1818; GCN-NEXT:    s_mov_b32 s45, 0x41200000
1819; GCN-NEXT:    s_mov_b32 s44, 0x41100000
1820; GCN-NEXT:    s_mov_b32 s43, 0x41000000
1821; GCN-NEXT:    s_mov_b32 s42, 0x40e00000
1822; GCN-NEXT:    s_mov_b32 s41, 0x40c00000
1823; GCN-NEXT:    s_mov_b32 s40, 0x40a00000
1824; GCN-NEXT:    s_mov_b32 s39, 4.0
1825; GCN-NEXT:    s_mov_b32 s38, 0x40400000
1826; GCN-NEXT:    s_mov_b32 s37, 2.0
1827; GCN-NEXT:    s_movrels_b32 s0, s36
1828; GCN-NEXT:    v_mov_b32_e32 v0, s0
1829; GCN-NEXT:    ; return to shader part epilog
1830;
1831; GFX10-LABEL: dyn_extract_v32f32_s_s:
1832; GFX10:       ; %bb.0: ; %entry
1833; GFX10-NEXT:    s_mov_b32 s36, 1.0
1834; GFX10-NEXT:    s_mov_b32 m0, s2
1835; GFX10-NEXT:    s_mov_b32 s67, 0x42000000
1836; GFX10-NEXT:    s_mov_b32 s66, 0x41f80000
1837; GFX10-NEXT:    s_mov_b32 s65, 0x41f00000
1838; GFX10-NEXT:    s_mov_b32 s64, 0x41e80000
1839; GFX10-NEXT:    s_mov_b32 s63, 0x41e00000
1840; GFX10-NEXT:    s_mov_b32 s62, 0x41d80000
1841; GFX10-NEXT:    s_mov_b32 s61, 0x41d00000
1842; GFX10-NEXT:    s_mov_b32 s60, 0x41c80000
1843; GFX10-NEXT:    s_mov_b32 s59, 0x41c00000
1844; GFX10-NEXT:    s_mov_b32 s58, 0x41b80000
1845; GFX10-NEXT:    s_mov_b32 s57, 0x41b00000
1846; GFX10-NEXT:    s_mov_b32 s56, 0x41a80000
1847; GFX10-NEXT:    s_mov_b32 s55, 0x41a00000
1848; GFX10-NEXT:    s_mov_b32 s54, 0x41980000
1849; GFX10-NEXT:    s_mov_b32 s53, 0x41900000
1850; GFX10-NEXT:    s_mov_b32 s52, 0x41880000
1851; GFX10-NEXT:    s_mov_b32 s51, 0x41800000
1852; GFX10-NEXT:    s_mov_b32 s50, 0x41700000
1853; GFX10-NEXT:    s_mov_b32 s49, 0x41600000
1854; GFX10-NEXT:    s_mov_b32 s48, 0x41500000
1855; GFX10-NEXT:    s_mov_b32 s47, 0x41400000
1856; GFX10-NEXT:    s_mov_b32 s46, 0x41300000
1857; GFX10-NEXT:    s_mov_b32 s45, 0x41200000
1858; GFX10-NEXT:    s_mov_b32 s44, 0x41100000
1859; GFX10-NEXT:    s_mov_b32 s43, 0x41000000
1860; GFX10-NEXT:    s_mov_b32 s42, 0x40e00000
1861; GFX10-NEXT:    s_mov_b32 s41, 0x40c00000
1862; GFX10-NEXT:    s_mov_b32 s40, 0x40a00000
1863; GFX10-NEXT:    s_mov_b32 s39, 4.0
1864; GFX10-NEXT:    s_mov_b32 s38, 0x40400000
1865; GFX10-NEXT:    s_mov_b32 s37, 2.0
1866; GFX10-NEXT:    s_movrels_b32 s0, s36
1867; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1868; GFX10-NEXT:    ; return to shader part epilog
1869entry:
1870  %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
1871  ret float %ext
1872}
1873
1874define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) {
1875; GCN-LABEL: dyn_extract_v16f64_s_s:
1876; GCN:       ; %bb.0: ; %entry
1877; GCN-NEXT:    s_mov_b32 s66, 0
1878; GCN-NEXT:    s_mov_b64 s[36:37], 1.0
1879; GCN-NEXT:    s_mov_b32 m0, s2
1880; GCN-NEXT:    s_mov_b32 s67, 0x40300000
1881; GCN-NEXT:    s_mov_b32 s65, 0x402e0000
1882; GCN-NEXT:    s_mov_b32 s64, s66
1883; GCN-NEXT:    s_mov_b32 s63, 0x402c0000
1884; GCN-NEXT:    s_mov_b32 s62, s66
1885; GCN-NEXT:    s_mov_b32 s61, 0x402a0000
1886; GCN-NEXT:    s_mov_b32 s60, s66
1887; GCN-NEXT:    s_mov_b32 s59, 0x40280000
1888; GCN-NEXT:    s_mov_b32 s58, s66
1889; GCN-NEXT:    s_mov_b32 s57, 0x40260000
1890; GCN-NEXT:    s_mov_b32 s56, s66
1891; GCN-NEXT:    s_mov_b32 s55, 0x40240000
1892; GCN-NEXT:    s_mov_b32 s54, s66
1893; GCN-NEXT:    s_mov_b32 s53, 0x40220000
1894; GCN-NEXT:    s_mov_b32 s52, s66
1895; GCN-NEXT:    s_mov_b32 s51, 0x40200000
1896; GCN-NEXT:    s_mov_b32 s50, s66
1897; GCN-NEXT:    s_mov_b32 s49, 0x401c0000
1898; GCN-NEXT:    s_mov_b32 s48, s66
1899; GCN-NEXT:    s_mov_b32 s47, 0x40180000
1900; GCN-NEXT:    s_mov_b32 s46, s66
1901; GCN-NEXT:    s_mov_b32 s45, 0x40140000
1902; GCN-NEXT:    s_mov_b32 s44, s66
1903; GCN-NEXT:    s_mov_b64 s[42:43], 4.0
1904; GCN-NEXT:    s_mov_b32 s41, 0x40080000
1905; GCN-NEXT:    s_mov_b32 s40, s66
1906; GCN-NEXT:    s_mov_b64 s[38:39], 2.0
1907; GCN-NEXT:    s_movrels_b64 s[0:1], s[36:37]
1908; GCN-NEXT:    ; return to shader part epilog
1909;
1910; GFX10-LABEL: dyn_extract_v16f64_s_s:
1911; GFX10:       ; %bb.0: ; %entry
1912; GFX10-NEXT:    s_mov_b32 s66, 0
1913; GFX10-NEXT:    s_mov_b64 s[36:37], 1.0
1914; GFX10-NEXT:    s_mov_b32 m0, s2
1915; GFX10-NEXT:    s_mov_b32 s67, 0x40300000
1916; GFX10-NEXT:    s_mov_b32 s65, 0x402e0000
1917; GFX10-NEXT:    s_mov_b32 s64, s66
1918; GFX10-NEXT:    s_mov_b32 s63, 0x402c0000
1919; GFX10-NEXT:    s_mov_b32 s62, s66
1920; GFX10-NEXT:    s_mov_b32 s61, 0x402a0000
1921; GFX10-NEXT:    s_mov_b32 s60, s66
1922; GFX10-NEXT:    s_mov_b32 s59, 0x40280000
1923; GFX10-NEXT:    s_mov_b32 s58, s66
1924; GFX10-NEXT:    s_mov_b32 s57, 0x40260000
1925; GFX10-NEXT:    s_mov_b32 s56, s66
1926; GFX10-NEXT:    s_mov_b32 s55, 0x40240000
1927; GFX10-NEXT:    s_mov_b32 s54, s66
1928; GFX10-NEXT:    s_mov_b32 s53, 0x40220000
1929; GFX10-NEXT:    s_mov_b32 s52, s66
1930; GFX10-NEXT:    s_mov_b32 s51, 0x40200000
1931; GFX10-NEXT:    s_mov_b32 s50, s66
1932; GFX10-NEXT:    s_mov_b32 s49, 0x401c0000
1933; GFX10-NEXT:    s_mov_b32 s48, s66
1934; GFX10-NEXT:    s_mov_b32 s47, 0x40180000
1935; GFX10-NEXT:    s_mov_b32 s46, s66
1936; GFX10-NEXT:    s_mov_b32 s45, 0x40140000
1937; GFX10-NEXT:    s_mov_b32 s44, s66
1938; GFX10-NEXT:    s_mov_b64 s[42:43], 4.0
1939; GFX10-NEXT:    s_mov_b32 s41, 0x40080000
1940; GFX10-NEXT:    s_mov_b32 s40, s66
1941; GFX10-NEXT:    s_mov_b64 s[38:39], 2.0
1942; GFX10-NEXT:    s_movrels_b64 s[0:1], s[36:37]
1943; GFX10-NEXT:    ; return to shader part epilog
1944entry:
1945  %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
1946  ret double %ext
1947}
1948
1949define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) {
1950; GCN-LABEL: dyn_extract_v6f32_s_v:
1951; GCN:       ; %bb.0: ; %entry
1952; GCN-NEXT:    s_mov_b32 s0, s2
1953; GCN-NEXT:    s_mov_b32 s1, s3
1954; GCN-NEXT:    v_mov_b32_e32 v1, s0
1955; GCN-NEXT:    v_mov_b32_e32 v2, s1
1956; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
1957; GCN-NEXT:    v_mov_b32_e32 v3, s4
1958; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1959; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
1960; GCN-NEXT:    v_mov_b32_e32 v4, s5
1961; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1962; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
1963; GCN-NEXT:    v_mov_b32_e32 v5, s6
1964; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
1965; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
1966; GCN-NEXT:    v_mov_b32_e32 v6, s7
1967; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1968; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
1969; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v6, vcc
1970; GCN-NEXT:    ; return to shader part epilog
1971;
1972; GFX10-LABEL: dyn_extract_v6f32_s_v:
1973; GFX10:       ; %bb.0: ; %entry
1974; GFX10-NEXT:    s_mov_b32 s1, s3
1975; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
1976; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1977; GFX10-NEXT:    s_mov_b32 s0, s2
1978; GFX10-NEXT:    s_mov_b32 s2, s4
1979; GFX10-NEXT:    s_mov_b32 s3, s5
1980; GFX10-NEXT:    s_mov_b32 s4, s6
1981; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
1982; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
1983; GFX10-NEXT:    s_mov_b32 s5, s7
1984; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
1985; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
1986; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
1987; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
1988; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
1989; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
1990; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s5, vcc_lo
1991; GFX10-NEXT:    ; return to shader part epilog
1992entry:
1993  %ext = extractelement <6 x float> %vec, i32 %sel
1994  ret float %ext
1995}
1996
1997define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) {
1998; GCN-LABEL: dyn_extract_v6f32_v_v:
1999; GCN:       ; %bb.0: ; %entry
2000; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2001; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v6
2002; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2003; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v6
2004; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2005; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v6
2006; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2007; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v6
2008; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2009; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v6
2010; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2011; GCN-NEXT:    s_setpc_b64 s[30:31]
2012;
2013; GFX10-LABEL: dyn_extract_v6f32_v_v:
2014; GFX10:       ; %bb.0: ; %entry
2015; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2016; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2017; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
2018; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2019; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v6
2020; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2021; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v6
2022; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2023; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v6
2024; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2025; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v6
2026; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2027; GFX10-NEXT:    s_setpc_b64 s[30:31]
2028entry:
2029  %ext = extractelement <6 x float> %vec, i32 %sel
2030  ret float %ext
2031}
2032
2033define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) {
2034; GCN-LABEL: dyn_extract_v6f32_v_s:
2035; GCN:       ; %bb.0: ; %entry
2036; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
2037; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2038; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
2039; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2040; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
2041; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2042; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
2043; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2044; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
2045; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2046; GCN-NEXT:    ; return to shader part epilog
2047;
2048; GFX10-LABEL: dyn_extract_v6f32_v_s:
2049; GFX10:       ; %bb.0: ; %entry
2050; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
2051; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2052; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
2053; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2054; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
2055; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2056; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
2057; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2058; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
2059; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2060; GFX10-NEXT:    ; return to shader part epilog
2061entry:
2062  %ext = extractelement <6 x float> %vec, i32 %sel
2063  ret float %ext
2064}
2065
2066define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) {
2067; GCN-LABEL: dyn_extract_v6f32_s_s:
2068; GCN:       ; %bb.0: ; %entry
2069; GCN-NEXT:    s_cmp_eq_u32 s8, 1
2070; GCN-NEXT:    s_cselect_b32 s0, s3, s2
2071; GCN-NEXT:    s_cmp_eq_u32 s8, 2
2072; GCN-NEXT:    s_cselect_b32 s0, s4, s0
2073; GCN-NEXT:    s_cmp_eq_u32 s8, 3
2074; GCN-NEXT:    s_cselect_b32 s0, s5, s0
2075; GCN-NEXT:    s_cmp_eq_u32 s8, 4
2076; GCN-NEXT:    s_cselect_b32 s0, s6, s0
2077; GCN-NEXT:    s_cmp_eq_u32 s8, 5
2078; GCN-NEXT:    s_cselect_b32 s0, s7, s0
2079; GCN-NEXT:    v_mov_b32_e32 v0, s0
2080; GCN-NEXT:    ; return to shader part epilog
2081;
2082; GFX10-LABEL: dyn_extract_v6f32_s_s:
2083; GFX10:       ; %bb.0: ; %entry
2084; GFX10-NEXT:    s_cmp_eq_u32 s8, 1
2085; GFX10-NEXT:    s_cselect_b32 s0, s3, s2
2086; GFX10-NEXT:    s_cmp_eq_u32 s8, 2
2087; GFX10-NEXT:    s_cselect_b32 s0, s4, s0
2088; GFX10-NEXT:    s_cmp_eq_u32 s8, 3
2089; GFX10-NEXT:    s_cselect_b32 s0, s5, s0
2090; GFX10-NEXT:    s_cmp_eq_u32 s8, 4
2091; GFX10-NEXT:    s_cselect_b32 s0, s6, s0
2092; GFX10-NEXT:    s_cmp_eq_u32 s8, 5
2093; GFX10-NEXT:    s_cselect_b32 s0, s7, s0
2094; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2095; GFX10-NEXT:    ; return to shader part epilog
2096entry:
2097  %ext = extractelement <6 x float> %vec, i32 %sel
2098  ret float %ext
2099}
2100
2101define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) {
2102; GCN-LABEL: dyn_extract_v7f32_s_v:
2103; GCN:       ; %bb.0: ; %entry
2104; GCN-NEXT:    s_mov_b32 s0, s2
2105; GCN-NEXT:    s_mov_b32 s1, s3
2106; GCN-NEXT:    s_mov_b32 s2, s4
2107; GCN-NEXT:    v_mov_b32_e32 v1, s0
2108; GCN-NEXT:    v_mov_b32_e32 v2, s1
2109; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2110; GCN-NEXT:    v_mov_b32_e32 v3, s2
2111; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2112; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2113; GCN-NEXT:    v_mov_b32_e32 v4, s5
2114; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2115; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2116; GCN-NEXT:    v_mov_b32_e32 v5, s6
2117; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2118; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2119; GCN-NEXT:    v_mov_b32_e32 v6, s7
2120; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2121; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2122; GCN-NEXT:    v_mov_b32_e32 v7, s8
2123; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
2124; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2125; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v7, vcc
2126; GCN-NEXT:    ; return to shader part epilog
2127;
2128; GFX10-LABEL: dyn_extract_v7f32_s_v:
2129; GFX10:       ; %bb.0: ; %entry
2130; GFX10-NEXT:    s_mov_b32 s1, s3
2131; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2132; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2133; GFX10-NEXT:    s_mov_b32 s0, s2
2134; GFX10-NEXT:    s_mov_b32 s2, s4
2135; GFX10-NEXT:    s_mov_b32 s3, s5
2136; GFX10-NEXT:    s_mov_b32 s4, s6
2137; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2138; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2139; GFX10-NEXT:    s_mov_b32 s5, s7
2140; GFX10-NEXT:    s_mov_b32 s6, s8
2141; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
2142; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2143; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
2144; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2145; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2146; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2147; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
2148; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2149; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s6, vcc_lo
2150; GFX10-NEXT:    ; return to shader part epilog
2151entry:
2152  %ext = extractelement <7 x float> %vec, i32 %sel
2153  ret float %ext
2154}
2155
2156define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) {
2157; GCN-LABEL: dyn_extract_v7f32_v_v:
2158; GCN:       ; %bb.0: ; %entry
2159; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2160; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v7
2161; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2162; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v7
2163; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2164; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
2165; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2166; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v7
2167; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2168; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v7
2169; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2170; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v7
2171; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2172; GCN-NEXT:    s_setpc_b64 s[30:31]
2173;
2174; GFX10-LABEL: dyn_extract_v7f32_v_v:
2175; GFX10:       ; %bb.0: ; %entry
2176; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2177; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2178; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
2179; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2180; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v7
2181; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2182; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v7
2183; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2184; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v7
2185; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2186; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v7
2187; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2188; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v7
2189; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2190; GFX10-NEXT:    s_setpc_b64 s[30:31]
2191entry:
2192  %ext = extractelement <7 x float> %vec, i32 %sel
2193  ret float %ext
2194}
2195
2196define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) {
2197; GCN-LABEL: dyn_extract_v7f32_v_s:
2198; GCN:       ; %bb.0: ; %entry
2199; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
2200; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2201; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
2202; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2203; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
2204; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2205; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
2206; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2207; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
2208; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2209; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
2210; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2211; GCN-NEXT:    ; return to shader part epilog
2212;
2213; GFX10-LABEL: dyn_extract_v7f32_v_s:
2214; GFX10:       ; %bb.0: ; %entry
2215; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
2216; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2217; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
2218; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2219; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
2220; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2221; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
2222; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2223; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
2224; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2225; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 6
2226; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2227; GFX10-NEXT:    ; return to shader part epilog
2228entry:
2229  %ext = extractelement <7 x float> %vec, i32 %sel
2230  ret float %ext
2231}
2232
2233define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) {
2234; GCN-LABEL: dyn_extract_v7f32_s_s:
2235; GCN:       ; %bb.0: ; %entry
2236; GCN-NEXT:    s_cmp_eq_u32 s9, 1
2237; GCN-NEXT:    s_cselect_b32 s0, s3, s2
2238; GCN-NEXT:    s_cmp_eq_u32 s9, 2
2239; GCN-NEXT:    s_cselect_b32 s0, s4, s0
2240; GCN-NEXT:    s_cmp_eq_u32 s9, 3
2241; GCN-NEXT:    s_cselect_b32 s0, s5, s0
2242; GCN-NEXT:    s_cmp_eq_u32 s9, 4
2243; GCN-NEXT:    s_cselect_b32 s0, s6, s0
2244; GCN-NEXT:    s_cmp_eq_u32 s9, 5
2245; GCN-NEXT:    s_cselect_b32 s0, s7, s0
2246; GCN-NEXT:    s_cmp_eq_u32 s9, 6
2247; GCN-NEXT:    s_cselect_b32 s0, s8, s0
2248; GCN-NEXT:    v_mov_b32_e32 v0, s0
2249; GCN-NEXT:    ; return to shader part epilog
2250;
2251; GFX10-LABEL: dyn_extract_v7f32_s_s:
2252; GFX10:       ; %bb.0: ; %entry
2253; GFX10-NEXT:    s_cmp_eq_u32 s9, 1
2254; GFX10-NEXT:    s_cselect_b32 s0, s3, s2
2255; GFX10-NEXT:    s_cmp_eq_u32 s9, 2
2256; GFX10-NEXT:    s_cselect_b32 s0, s4, s0
2257; GFX10-NEXT:    s_cmp_eq_u32 s9, 3
2258; GFX10-NEXT:    s_cselect_b32 s0, s5, s0
2259; GFX10-NEXT:    s_cmp_eq_u32 s9, 4
2260; GFX10-NEXT:    s_cselect_b32 s0, s6, s0
2261; GFX10-NEXT:    s_cmp_eq_u32 s9, 5
2262; GFX10-NEXT:    s_cselect_b32 s0, s7, s0
2263; GFX10-NEXT:    s_cmp_eq_u32 s9, 6
2264; GFX10-NEXT:    s_cselect_b32 s0, s8, s0
2265; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2266; GFX10-NEXT:    ; return to shader part epilog
2267entry:
2268  %ext = extractelement <7 x float> %vec, i32 %sel
2269  ret float %ext
2270}
2271
2272define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) {
2273; GCN-LABEL: dyn_extract_v6f64_s_v:
2274; GCN:       ; %bb.0: ; %entry
2275; GCN-NEXT:    s_mov_b32 s0, s2
2276; GCN-NEXT:    s_mov_b32 s1, s3
2277; GCN-NEXT:    s_mov_b32 s2, s4
2278; GCN-NEXT:    s_mov_b32 s3, s5
2279; GCN-NEXT:    s_mov_b32 s4, s6
2280; GCN-NEXT:    s_mov_b32 s5, s7
2281; GCN-NEXT:    v_mov_b32_e32 v1, s0
2282; GCN-NEXT:    v_mov_b32_e32 v2, s1
2283; GCN-NEXT:    v_mov_b32_e32 v3, s2
2284; GCN-NEXT:    v_mov_b32_e32 v4, s3
2285; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2286; GCN-NEXT:    s_mov_b32 s6, s8
2287; GCN-NEXT:    s_mov_b32 s7, s9
2288; GCN-NEXT:    v_mov_b32_e32 v5, s4
2289; GCN-NEXT:    v_mov_b32_e32 v6, s5
2290; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2291; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2292; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2293; GCN-NEXT:    v_mov_b32_e32 v7, s6
2294; GCN-NEXT:    v_mov_b32_e32 v8, s7
2295; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2296; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2297; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2298; GCN-NEXT:    v_mov_b32_e32 v9, s10
2299; GCN-NEXT:    v_mov_b32_e32 v10, s11
2300; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2301; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2302; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2303; GCN-NEXT:    v_mov_b32_e32 v11, s12
2304; GCN-NEXT:    v_mov_b32_e32 v12, s13
2305; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2306; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
2307; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2308; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v11, vcc
2309; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v12, vcc
2310; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2311; GCN-NEXT:    v_readfirstlane_b32 s1, v1
2312; GCN-NEXT:    ; return to shader part epilog
2313;
2314; GFX10-LABEL: dyn_extract_v6f64_s_v:
2315; GFX10:       ; %bb.0: ; %entry
2316; GFX10-NEXT:    s_mov_b32 s0, s2
2317; GFX10-NEXT:    s_mov_b32 s2, s4
2318; GFX10-NEXT:    s_mov_b32 s15, s5
2319; GFX10-NEXT:    v_mov_b32_e32 v1, s2
2320; GFX10-NEXT:    v_mov_b32_e32 v2, s15
2321; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2322; GFX10-NEXT:    s_mov_b32 s1, s3
2323; GFX10-NEXT:    s_mov_b32 s4, s6
2324; GFX10-NEXT:    s_mov_b32 s5, s7
2325; GFX10-NEXT:    s_mov_b32 s6, s8
2326; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2327; GFX10-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2328; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2329; GFX10-NEXT:    s_mov_b32 s7, s9
2330; GFX10-NEXT:    s_mov_b32 s8, s10
2331; GFX10-NEXT:    s_mov_b32 s9, s11
2332; GFX10-NEXT:    s_mov_b32 s10, s12
2333; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2334; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2335; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2336; GFX10-NEXT:    s_mov_b32 s11, s13
2337; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2338; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2339; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2340; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2341; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2342; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2343; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s10, vcc_lo
2344; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s11, vcc_lo
2345; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2346; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
2347; GFX10-NEXT:    ; return to shader part epilog
2348entry:
2349  %ext = extractelement <6 x double> %vec, i32 %sel
2350  ret double %ext
2351}
2352
2353define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) {
2354; GCN-LABEL: dyn_extract_v6f64_v_v:
2355; GCN:       ; %bb.0: ; %entry
2356; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2357; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v12
2358; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2359; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2360; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v12
2361; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2362; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2363; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v12
2364; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2365; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2366; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v12
2367; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
2368; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2369; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v12
2370; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
2371; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2372; GCN-NEXT:    s_setpc_b64 s[30:31]
2373;
2374; GFX10-LABEL: dyn_extract_v6f64_v_v:
2375; GFX10:       ; %bb.0: ; %entry
2376; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2377; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2378; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
2379; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2380; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2381; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v12
2382; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2383; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2384; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v12
2385; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2386; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2387; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v12
2388; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2389; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2390; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v12
2391; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2392; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2393; GFX10-NEXT:    s_setpc_b64 s[30:31]
2394entry:
2395  %ext = extractelement <6 x double> %vec, i32 %sel
2396  ret double %ext
2397}
2398
2399define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) {
2400; GPRIDX-LABEL: dyn_extract_v6f64_v_s:
2401; GPRIDX:       ; %bb.0: ; %entry
2402; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
2403; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
2404; GPRIDX-NEXT:    v_mov_b32_e32 v12, v0
2405; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
2406; GPRIDX-NEXT:    s_set_gpr_idx_off
2407; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v12
2408; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
2409; GPRIDX-NEXT:    ; return to shader part epilog
2410;
2411; MOVREL-LABEL: dyn_extract_v6f64_v_s:
2412; MOVREL:       ; %bb.0: ; %entry
2413; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
2414; MOVREL-NEXT:    v_movrels_b32_e32 v12, v0
2415; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
2416; MOVREL-NEXT:    v_readfirstlane_b32 s0, v12
2417; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
2418; MOVREL-NEXT:    ; return to shader part epilog
2419;
2420; GFX10-LABEL: dyn_extract_v6f64_v_s:
2421; GFX10:       ; %bb.0: ; %entry
2422; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
2423; GFX10-NEXT:    v_movrels_b32_e32 v12, v0
2424; GFX10-NEXT:    v_movrels_b32_e32 v0, v1
2425; GFX10-NEXT:    v_readfirstlane_b32 s0, v12
2426; GFX10-NEXT:    v_readfirstlane_b32 s1, v0
2427; GFX10-NEXT:    ; return to shader part epilog
2428entry:
2429  %ext = extractelement <6 x double> %vec, i32 %sel
2430  ret double %ext
2431}
2432
2433define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) {
2434; GCN-LABEL: dyn_extract_v6f64_s_s:
2435; GCN:       ; %bb.0: ; %entry
2436; GCN-NEXT:    s_mov_b32 s0, s2
2437; GCN-NEXT:    s_mov_b32 s1, s3
2438; GCN-NEXT:    s_mov_b32 m0, s14
2439; GCN-NEXT:    s_mov_b32 s2, s4
2440; GCN-NEXT:    s_mov_b32 s3, s5
2441; GCN-NEXT:    s_mov_b32 s4, s6
2442; GCN-NEXT:    s_mov_b32 s5, s7
2443; GCN-NEXT:    s_mov_b32 s6, s8
2444; GCN-NEXT:    s_mov_b32 s7, s9
2445; GCN-NEXT:    s_mov_b32 s8, s10
2446; GCN-NEXT:    s_mov_b32 s9, s11
2447; GCN-NEXT:    s_mov_b32 s10, s12
2448; GCN-NEXT:    s_mov_b32 s11, s13
2449; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2450; GCN-NEXT:    ; return to shader part epilog
2451;
2452; GFX10-LABEL: dyn_extract_v6f64_s_s:
2453; GFX10:       ; %bb.0: ; %entry
2454; GFX10-NEXT:    s_mov_b32 s0, s2
2455; GFX10-NEXT:    s_mov_b32 s1, s3
2456; GFX10-NEXT:    s_mov_b32 m0, s14
2457; GFX10-NEXT:    s_mov_b32 s2, s4
2458; GFX10-NEXT:    s_mov_b32 s3, s5
2459; GFX10-NEXT:    s_mov_b32 s4, s6
2460; GFX10-NEXT:    s_mov_b32 s5, s7
2461; GFX10-NEXT:    s_mov_b32 s6, s8
2462; GFX10-NEXT:    s_mov_b32 s7, s9
2463; GFX10-NEXT:    s_mov_b32 s8, s10
2464; GFX10-NEXT:    s_mov_b32 s9, s11
2465; GFX10-NEXT:    s_mov_b32 s10, s12
2466; GFX10-NEXT:    s_mov_b32 s11, s13
2467; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2468; GFX10-NEXT:    ; return to shader part epilog
2469entry:
2470  %ext = extractelement <6 x double> %vec, i32 %sel
2471  ret double %ext
2472}
2473
2474define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) {
2475; GCN-LABEL: dyn_extract_v7f64_s_v:
2476; GCN:       ; %bb.0: ; %entry
2477; GCN-NEXT:    s_mov_b32 s0, s2
2478; GCN-NEXT:    s_mov_b32 s1, s3
2479; GCN-NEXT:    s_mov_b32 s2, s4
2480; GCN-NEXT:    s_mov_b32 s3, s5
2481; GCN-NEXT:    s_mov_b32 s4, s6
2482; GCN-NEXT:    s_mov_b32 s5, s7
2483; GCN-NEXT:    v_mov_b32_e32 v1, s0
2484; GCN-NEXT:    v_mov_b32_e32 v2, s1
2485; GCN-NEXT:    v_mov_b32_e32 v3, s2
2486; GCN-NEXT:    v_mov_b32_e32 v4, s3
2487; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2488; GCN-NEXT:    s_mov_b32 s6, s8
2489; GCN-NEXT:    s_mov_b32 s7, s9
2490; GCN-NEXT:    v_mov_b32_e32 v5, s4
2491; GCN-NEXT:    v_mov_b32_e32 v6, s5
2492; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2493; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2494; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2495; GCN-NEXT:    s_mov_b32 s8, s10
2496; GCN-NEXT:    s_mov_b32 s9, s11
2497; GCN-NEXT:    v_mov_b32_e32 v7, s6
2498; GCN-NEXT:    v_mov_b32_e32 v8, s7
2499; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2500; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2501; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2502; GCN-NEXT:    v_mov_b32_e32 v9, s8
2503; GCN-NEXT:    v_mov_b32_e32 v10, s9
2504; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2505; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2506; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2507; GCN-NEXT:    v_mov_b32_e32 v11, s12
2508; GCN-NEXT:    v_mov_b32_e32 v12, s13
2509; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2510; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
2511; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2512; GCN-NEXT:    v_mov_b32_e32 v13, s14
2513; GCN-NEXT:    v_mov_b32_e32 v14, s15
2514; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2515; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
2516; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2517; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v13, vcc
2518; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v14, vcc
2519; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2520; GCN-NEXT:    v_readfirstlane_b32 s1, v1
2521; GCN-NEXT:    ; return to shader part epilog
2522;
2523; GFX10-LABEL: dyn_extract_v7f64_s_v:
2524; GFX10:       ; %bb.0: ; %entry
2525; GFX10-NEXT:    s_mov_b32 s0, s2
2526; GFX10-NEXT:    s_mov_b32 s2, s4
2527; GFX10-NEXT:    s_mov_b32 s19, s5
2528; GFX10-NEXT:    v_mov_b32_e32 v1, s2
2529; GFX10-NEXT:    v_mov_b32_e32 v2, s19
2530; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2531; GFX10-NEXT:    s_mov_b32 s1, s3
2532; GFX10-NEXT:    s_mov_b32 s4, s6
2533; GFX10-NEXT:    s_mov_b32 s5, s7
2534; GFX10-NEXT:    s_mov_b32 s6, s8
2535; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2536; GFX10-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2537; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2538; GFX10-NEXT:    s_mov_b32 s7, s9
2539; GFX10-NEXT:    s_mov_b32 s8, s10
2540; GFX10-NEXT:    s_mov_b32 s9, s11
2541; GFX10-NEXT:    s_mov_b32 s10, s12
2542; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2543; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2544; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2545; GFX10-NEXT:    s_mov_b32 s11, s13
2546; GFX10-NEXT:    s_mov_b32 s12, s14
2547; GFX10-NEXT:    s_mov_b32 s13, s15
2548; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2549; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2550; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2551; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2552; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2553; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2554; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2555; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2556; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2557; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s12, vcc_lo
2558; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s13, vcc_lo
2559; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2560; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
2561; GFX10-NEXT:    ; return to shader part epilog
2562entry:
2563  %ext = extractelement <7 x double> %vec, i32 %sel
2564  ret double %ext
2565}
2566
2567define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) {
2568; GCN-LABEL: dyn_extract_v7f64_v_v:
2569; GCN:       ; %bb.0: ; %entry
2570; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2571; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v14
2572; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2573; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2574; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v14
2575; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2576; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2577; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v14
2578; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2579; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2580; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v14
2581; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
2582; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2583; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v14
2584; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
2585; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2586; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v14
2587; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
2588; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
2589; GCN-NEXT:    s_setpc_b64 s[30:31]
2590;
2591; GFX10-LABEL: dyn_extract_v7f64_v_v:
2592; GFX10:       ; %bb.0: ; %entry
2593; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2594; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2595; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
2596; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2597; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2598; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v14
2599; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2600; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2601; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v14
2602; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2603; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2604; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v14
2605; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2606; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2607; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v14
2608; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2609; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2610; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v14
2611; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
2612; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
2613; GFX10-NEXT:    s_setpc_b64 s[30:31]
2614entry:
2615  %ext = extractelement <7 x double> %vec, i32 %sel
2616  ret double %ext
2617}
2618
2619define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) {
2620; GPRIDX-LABEL: dyn_extract_v7f64_v_s:
2621; GPRIDX:       ; %bb.0: ; %entry
2622; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
2623; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
2624; GPRIDX-NEXT:    v_mov_b32_e32 v14, v0
2625; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
2626; GPRIDX-NEXT:    s_set_gpr_idx_off
2627; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v14
2628; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
2629; GPRIDX-NEXT:    ; return to shader part epilog
2630;
2631; MOVREL-LABEL: dyn_extract_v7f64_v_s:
2632; MOVREL:       ; %bb.0: ; %entry
2633; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
2634; MOVREL-NEXT:    v_movrels_b32_e32 v14, v0
2635; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
2636; MOVREL-NEXT:    v_readfirstlane_b32 s0, v14
2637; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
2638; MOVREL-NEXT:    ; return to shader part epilog
2639;
2640; GFX10-LABEL: dyn_extract_v7f64_v_s:
2641; GFX10:       ; %bb.0: ; %entry
2642; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
2643; GFX10-NEXT:    v_movrels_b32_e32 v14, v0
2644; GFX10-NEXT:    v_movrels_b32_e32 v0, v1
2645; GFX10-NEXT:    v_readfirstlane_b32 s0, v14
2646; GFX10-NEXT:    v_readfirstlane_b32 s1, v0
2647; GFX10-NEXT:    ; return to shader part epilog
2648entry:
2649  %ext = extractelement <7 x double> %vec, i32 %sel
2650  ret double %ext
2651}
2652
2653define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) {
2654; GCN-LABEL: dyn_extract_v7f64_s_s:
2655; GCN:       ; %bb.0: ; %entry
2656; GCN-NEXT:    s_mov_b32 s0, s2
2657; GCN-NEXT:    s_mov_b32 s1, s3
2658; GCN-NEXT:    s_mov_b32 m0, s16
2659; GCN-NEXT:    s_mov_b32 s2, s4
2660; GCN-NEXT:    s_mov_b32 s3, s5
2661; GCN-NEXT:    s_mov_b32 s4, s6
2662; GCN-NEXT:    s_mov_b32 s5, s7
2663; GCN-NEXT:    s_mov_b32 s6, s8
2664; GCN-NEXT:    s_mov_b32 s7, s9
2665; GCN-NEXT:    s_mov_b32 s8, s10
2666; GCN-NEXT:    s_mov_b32 s9, s11
2667; GCN-NEXT:    s_mov_b32 s10, s12
2668; GCN-NEXT:    s_mov_b32 s11, s13
2669; GCN-NEXT:    s_mov_b32 s12, s14
2670; GCN-NEXT:    s_mov_b32 s13, s15
2671; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2672; GCN-NEXT:    ; return to shader part epilog
2673;
2674; GFX10-LABEL: dyn_extract_v7f64_s_s:
2675; GFX10:       ; %bb.0: ; %entry
2676; GFX10-NEXT:    s_mov_b32 s0, s2
2677; GFX10-NEXT:    s_mov_b32 s1, s3
2678; GFX10-NEXT:    s_mov_b32 m0, s16
2679; GFX10-NEXT:    s_mov_b32 s2, s4
2680; GFX10-NEXT:    s_mov_b32 s3, s5
2681; GFX10-NEXT:    s_mov_b32 s4, s6
2682; GFX10-NEXT:    s_mov_b32 s5, s7
2683; GFX10-NEXT:    s_mov_b32 s6, s8
2684; GFX10-NEXT:    s_mov_b32 s7, s9
2685; GFX10-NEXT:    s_mov_b32 s8, s10
2686; GFX10-NEXT:    s_mov_b32 s9, s11
2687; GFX10-NEXT:    s_mov_b32 s10, s12
2688; GFX10-NEXT:    s_mov_b32 s11, s13
2689; GFX10-NEXT:    s_mov_b32 s12, s14
2690; GFX10-NEXT:    s_mov_b32 s13, s15
2691; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2692; GFX10-NEXT:    ; return to shader part epilog
2693entry:
2694  %ext = extractelement <7 x double> %vec, i32 %sel
2695  ret double %ext
2696}
2697
2698define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 %sel) {
2699; GPRIDX-LABEL: dyn_extract_v5f64_s_s:
2700; GPRIDX:         .amd_kernel_code_t
2701; GPRIDX-NEXT:     amd_code_version_major = 1
2702; GPRIDX-NEXT:     amd_code_version_minor = 2
2703; GPRIDX-NEXT:     amd_machine_kind = 1
2704; GPRIDX-NEXT:     amd_machine_version_major = 9
2705; GPRIDX-NEXT:     amd_machine_version_minor = 0
2706; GPRIDX-NEXT:     amd_machine_version_stepping = 0
2707; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
2708; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
2709; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
2710; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 1
2711; GPRIDX-NEXT:     priority = 0
2712; GPRIDX-NEXT:     float_mode = 240
2713; GPRIDX-NEXT:     priv = 0
2714; GPRIDX-NEXT:     enable_dx10_clamp = 1
2715; GPRIDX-NEXT:     debug_mode = 0
2716; GPRIDX-NEXT:     enable_ieee_mode = 1
2717; GPRIDX-NEXT:     enable_wgp_mode = 0
2718; GPRIDX-NEXT:     enable_mem_ordered = 0
2719; GPRIDX-NEXT:     enable_fwd_progress = 0
2720; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
2721; GPRIDX-NEXT:     user_sgpr_count = 6
2722; GPRIDX-NEXT:     enable_trap_handler = 0
2723; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
2724; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 0
2725; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 0
2726; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
2727; GPRIDX-NEXT:     enable_vgpr_workitem_id = 0
2728; GPRIDX-NEXT:     enable_exception_msb = 0
2729; GPRIDX-NEXT:     granulated_lds_size = 0
2730; GPRIDX-NEXT:     enable_exception = 0
2731; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
2732; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 0
2733; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 0
2734; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
2735; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 0
2736; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
2737; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
2738; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
2739; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
2740; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
2741; GPRIDX-NEXT:     enable_wavefront_size32 = 0
2742; GPRIDX-NEXT:     enable_ordered_append_gds = 0
2743; GPRIDX-NEXT:     private_element_size = 1
2744; GPRIDX-NEXT:     is_ptr64 = 1
2745; GPRIDX-NEXT:     is_dynamic_callstack = 0
2746; GPRIDX-NEXT:     is_debug_enabled = 0
2747; GPRIDX-NEXT:     is_xnack_enabled = 1
2748; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
2749; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
2750; GPRIDX-NEXT:     gds_segment_byte_size = 0
2751; GPRIDX-NEXT:     kernarg_segment_byte_size = 12
2752; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
2753; GPRIDX-NEXT:     wavefront_sgpr_count = 9
2754; GPRIDX-NEXT:     workitem_vgpr_count = 3
2755; GPRIDX-NEXT:     reserved_vgpr_first = 0
2756; GPRIDX-NEXT:     reserved_vgpr_count = 0
2757; GPRIDX-NEXT:     reserved_sgpr_first = 0
2758; GPRIDX-NEXT:     reserved_sgpr_count = 0
2759; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
2760; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
2761; GPRIDX-NEXT:     kernarg_segment_alignment = 4
2762; GPRIDX-NEXT:     group_segment_alignment = 4
2763; GPRIDX-NEXT:     private_segment_alignment = 4
2764; GPRIDX-NEXT:     wavefront_size = 6
2765; GPRIDX-NEXT:     call_convention = -1
2766; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
2767; GPRIDX-NEXT:    .end_amd_kernel_code_t
2768; GPRIDX-NEXT:  ; %bb.0: ; %entry
2769; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2770; GPRIDX-NEXT:    s_load_dword s8, s[4:5], 0x8
2771; GPRIDX-NEXT:    s_mov_b32 s2, 0
2772; GPRIDX-NEXT:    s_mov_b32 s3, 0x40140000
2773; GPRIDX-NEXT:    s_mov_b32 s5, 0x40080000
2774; GPRIDX-NEXT:    s_mov_b32 s4, s2
2775; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
2776; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 1
2777; GPRIDX-NEXT:    s_cselect_b64 s[6:7], 2.0, 1.0
2778; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 2
2779; GPRIDX-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
2780; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 3
2781; GPRIDX-NEXT:    s_cselect_b64 s[4:5], 4.0, s[4:5]
2782; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 4
2783; GPRIDX-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
2784; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
2785; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
2786; GPRIDX-NEXT:    v_mov_b32_e32 v2, 0
2787; GPRIDX-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
2788; GPRIDX-NEXT:    s_endpgm
2789;
2790; MOVREL-LABEL: dyn_extract_v5f64_s_s:
2791; MOVREL:         .amd_kernel_code_t
2792; MOVREL-NEXT:     amd_code_version_major = 1
2793; MOVREL-NEXT:     amd_code_version_minor = 2
2794; MOVREL-NEXT:     amd_machine_kind = 1
2795; MOVREL-NEXT:     amd_machine_version_major = 8
2796; MOVREL-NEXT:     amd_machine_version_minor = 0
2797; MOVREL-NEXT:     amd_machine_version_stepping = 3
2798; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
2799; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
2800; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
2801; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 1
2802; MOVREL-NEXT:     priority = 0
2803; MOVREL-NEXT:     float_mode = 240
2804; MOVREL-NEXT:     priv = 0
2805; MOVREL-NEXT:     enable_dx10_clamp = 1
2806; MOVREL-NEXT:     debug_mode = 0
2807; MOVREL-NEXT:     enable_ieee_mode = 1
2808; MOVREL-NEXT:     enable_wgp_mode = 0
2809; MOVREL-NEXT:     enable_mem_ordered = 0
2810; MOVREL-NEXT:     enable_fwd_progress = 0
2811; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
2812; MOVREL-NEXT:     user_sgpr_count = 6
2813; MOVREL-NEXT:     enable_trap_handler = 0
2814; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
2815; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 0
2816; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 0
2817; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
2818; MOVREL-NEXT:     enable_vgpr_workitem_id = 0
2819; MOVREL-NEXT:     enable_exception_msb = 0
2820; MOVREL-NEXT:     granulated_lds_size = 0
2821; MOVREL-NEXT:     enable_exception = 0
2822; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
2823; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 0
2824; MOVREL-NEXT:     enable_sgpr_queue_ptr = 0
2825; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
2826; MOVREL-NEXT:     enable_sgpr_dispatch_id = 0
2827; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
2828; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
2829; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
2830; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
2831; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
2832; MOVREL-NEXT:     enable_wavefront_size32 = 0
2833; MOVREL-NEXT:     enable_ordered_append_gds = 0
2834; MOVREL-NEXT:     private_element_size = 1
2835; MOVREL-NEXT:     is_ptr64 = 1
2836; MOVREL-NEXT:     is_dynamic_callstack = 0
2837; MOVREL-NEXT:     is_debug_enabled = 0
2838; MOVREL-NEXT:     is_xnack_enabled = 0
2839; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
2840; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
2841; MOVREL-NEXT:     gds_segment_byte_size = 0
2842; MOVREL-NEXT:     kernarg_segment_byte_size = 12
2843; MOVREL-NEXT:     workgroup_fbarrier_count = 0
2844; MOVREL-NEXT:     wavefront_sgpr_count = 9
2845; MOVREL-NEXT:     workitem_vgpr_count = 4
2846; MOVREL-NEXT:     reserved_vgpr_first = 0
2847; MOVREL-NEXT:     reserved_vgpr_count = 0
2848; MOVREL-NEXT:     reserved_sgpr_first = 0
2849; MOVREL-NEXT:     reserved_sgpr_count = 0
2850; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
2851; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
2852; MOVREL-NEXT:     kernarg_segment_alignment = 4
2853; MOVREL-NEXT:     group_segment_alignment = 4
2854; MOVREL-NEXT:     private_segment_alignment = 4
2855; MOVREL-NEXT:     wavefront_size = 6
2856; MOVREL-NEXT:     call_convention = -1
2857; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
2858; MOVREL-NEXT:    .end_amd_kernel_code_t
2859; MOVREL-NEXT:  ; %bb.0: ; %entry
2860; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2861; MOVREL-NEXT:    s_load_dword s8, s[4:5], 0x8
2862; MOVREL-NEXT:    s_mov_b32 s2, 0
2863; MOVREL-NEXT:    s_mov_b32 s3, 0x40140000
2864; MOVREL-NEXT:    s_mov_b32 s5, 0x40080000
2865; MOVREL-NEXT:    s_mov_b32 s4, s2
2866; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
2867; MOVREL-NEXT:    s_cmp_eq_u32 s8, 1
2868; MOVREL-NEXT:    s_cselect_b64 s[6:7], 2.0, 1.0
2869; MOVREL-NEXT:    s_cmp_eq_u32 s8, 2
2870; MOVREL-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
2871; MOVREL-NEXT:    s_cmp_eq_u32 s8, 3
2872; MOVREL-NEXT:    s_cselect_b64 s[4:5], 4.0, s[4:5]
2873; MOVREL-NEXT:    s_cmp_eq_u32 s8, 4
2874; MOVREL-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
2875; MOVREL-NEXT:    v_mov_b32_e32 v0, s2
2876; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
2877; MOVREL-NEXT:    v_mov_b32_e32 v1, s3
2878; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
2879; MOVREL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
2880; MOVREL-NEXT:    s_endpgm
2881;
2882; GFX10-LABEL: dyn_extract_v5f64_s_s:
2883; GFX10:         .amd_kernel_code_t
2884; GFX10-NEXT:     amd_code_version_major = 1
2885; GFX10-NEXT:     amd_code_version_minor = 2
2886; GFX10-NEXT:     amd_machine_kind = 1
2887; GFX10-NEXT:     amd_machine_version_major = 10
2888; GFX10-NEXT:     amd_machine_version_minor = 1
2889; GFX10-NEXT:     amd_machine_version_stepping = 0
2890; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
2891; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
2892; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
2893; GFX10-NEXT:     granulated_wavefront_sgpr_count = 1
2894; GFX10-NEXT:     priority = 0
2895; GFX10-NEXT:     float_mode = 240
2896; GFX10-NEXT:     priv = 0
2897; GFX10-NEXT:     enable_dx10_clamp = 1
2898; GFX10-NEXT:     debug_mode = 0
2899; GFX10-NEXT:     enable_ieee_mode = 1
2900; GFX10-NEXT:     enable_wgp_mode = 1
2901; GFX10-NEXT:     enable_mem_ordered = 1
2902; GFX10-NEXT:     enable_fwd_progress = 0
2903; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
2904; GFX10-NEXT:     user_sgpr_count = 6
2905; GFX10-NEXT:     enable_trap_handler = 0
2906; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
2907; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 0
2908; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 0
2909; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
2910; GFX10-NEXT:     enable_vgpr_workitem_id = 0
2911; GFX10-NEXT:     enable_exception_msb = 0
2912; GFX10-NEXT:     granulated_lds_size = 0
2913; GFX10-NEXT:     enable_exception = 0
2914; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
2915; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 0
2916; GFX10-NEXT:     enable_sgpr_queue_ptr = 0
2917; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
2918; GFX10-NEXT:     enable_sgpr_dispatch_id = 0
2919; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
2920; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
2921; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
2922; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
2923; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
2924; GFX10-NEXT:     enable_wavefront_size32 = 1
2925; GFX10-NEXT:     enable_ordered_append_gds = 0
2926; GFX10-NEXT:     private_element_size = 1
2927; GFX10-NEXT:     is_ptr64 = 1
2928; GFX10-NEXT:     is_dynamic_callstack = 0
2929; GFX10-NEXT:     is_debug_enabled = 0
2930; GFX10-NEXT:     is_xnack_enabled = 1
2931; GFX10-NEXT:     workitem_private_segment_byte_size = 0
2932; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
2933; GFX10-NEXT:     gds_segment_byte_size = 0
2934; GFX10-NEXT:     kernarg_segment_byte_size = 12
2935; GFX10-NEXT:     workgroup_fbarrier_count = 0
2936; GFX10-NEXT:     wavefront_sgpr_count = 9
2937; GFX10-NEXT:     workitem_vgpr_count = 3
2938; GFX10-NEXT:     reserved_vgpr_first = 0
2939; GFX10-NEXT:     reserved_vgpr_count = 0
2940; GFX10-NEXT:     reserved_sgpr_first = 0
2941; GFX10-NEXT:     reserved_sgpr_count = 0
2942; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
2943; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
2944; GFX10-NEXT:     kernarg_segment_alignment = 4
2945; GFX10-NEXT:     group_segment_alignment = 4
2946; GFX10-NEXT:     private_segment_alignment = 4
2947; GFX10-NEXT:     wavefront_size = 5
2948; GFX10-NEXT:     call_convention = -1
2949; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
2950; GFX10-NEXT:    .end_amd_kernel_code_t
2951; GFX10-NEXT:  ; %bb.0: ; %entry
2952; GFX10-NEXT:    s_clause 0x1
2953; GFX10-NEXT:    s_load_dword s8, s[4:5], 0x8
2954; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2955; GFX10-NEXT:    s_mov_b32 s2, 0
2956; GFX10-NEXT:    s_mov_b32 s3, 0x40140000
2957; GFX10-NEXT:    s_mov_b32 s5, 0x40080000
2958; GFX10-NEXT:    s_mov_b32 s4, s2
2959; GFX10-NEXT:    v_mov_b32_e32 v2, 0
2960; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2961; GFX10-NEXT:    s_cmp_eq_u32 s8, 1
2962; GFX10-NEXT:    s_cselect_b64 s[6:7], 2.0, 1.0
2963; GFX10-NEXT:    s_cmp_eq_u32 s8, 2
2964; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
2965; GFX10-NEXT:    s_cmp_eq_u32 s8, 3
2966; GFX10-NEXT:    s_cselect_b64 s[4:5], 4.0, s[4:5]
2967; GFX10-NEXT:    s_cmp_eq_u32 s8, 4
2968; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
2969; GFX10-NEXT:    v_mov_b32_e32 v0, s2
2970; GFX10-NEXT:    v_mov_b32_e32 v1, s3
2971; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
2972; GFX10-NEXT:    s_endpgm
2973entry:
2974  %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel
2975  store double %ext, double addrspace(1)* %out
2976  ret void
2977}
2978
2979define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
2980; GCN-LABEL: dyn_extract_v15f32_const_s_v:
2981; GCN:       ; %bb.0: ; %entry
2982; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2983; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2984; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
2985; GCN-NEXT:    v_cndmask_b32_e64 v13, 1.0, 2.0, vcc
2986; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2987; GCN-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
2988; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2989; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
2990; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
2991; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2992; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
2993; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2994; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2995; GCN-NEXT:    v_mov_b32_e32 v4, 0x40e00000
2996; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2997; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2998; GCN-NEXT:    v_mov_b32_e32 v5, 0x41000000
2999; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
3000; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
3001; GCN-NEXT:    v_mov_b32_e32 v6, 0x41100000
3002; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
3003; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v0
3004; GCN-NEXT:    v_mov_b32_e32 v7, 0x41200000
3005; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
3006; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
3007; GCN-NEXT:    v_mov_b32_e32 v8, 0x41300000
3008; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
3009; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
3010; GCN-NEXT:    v_mov_b32_e32 v9, 0x41400000
3011; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
3012; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
3013; GCN-NEXT:    v_mov_b32_e32 v10, 0x41500000
3014; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
3015; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
3016; GCN-NEXT:    v_mov_b32_e32 v11, 0x41600000
3017; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
3018; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
3019; GCN-NEXT:    v_mov_b32_e32 v12, 0x41700000
3020; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
3021; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
3022; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v12, vcc
3023; GCN-NEXT:    s_setpc_b64 s[30:31]
3024;
3025; GFX10-LABEL: dyn_extract_v15f32_const_s_v:
3026; GFX10:       ; %bb.0: ; %entry
3027; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3028; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3029; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3030; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3031; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
3032; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3033; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
3034; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3035; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
3036; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3037; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
3038; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3039; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
3040; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3041; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
3042; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3043; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
3044; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3045; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
3046; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3047; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
3048; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3049; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
3050; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3051; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
3052; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3053; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
3054; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3055; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
3056; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, 0x41700000, vcc_lo
3057; GFX10-NEXT:    s_setpc_b64 s[30:31]
3058entry:
3059  %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3060  ret float %ext
3061}
3062
3063define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) {
3064; GCN-LABEL: dyn_extract_v15f32_const_s_s:
3065; GCN:       ; %bb.0: ; %entry
3066; GCN-NEXT:    s_mov_b32 s4, 1.0
3067; GCN-NEXT:    s_mov_b32 m0, s2
3068; GCN-NEXT:    s_mov_b32 s18, 0x41700000
3069; GCN-NEXT:    s_mov_b32 s17, 0x41600000
3070; GCN-NEXT:    s_mov_b32 s16, 0x41500000
3071; GCN-NEXT:    s_mov_b32 s15, 0x41400000
3072; GCN-NEXT:    s_mov_b32 s14, 0x41300000
3073; GCN-NEXT:    s_mov_b32 s13, 0x41200000
3074; GCN-NEXT:    s_mov_b32 s12, 0x41100000
3075; GCN-NEXT:    s_mov_b32 s11, 0x41000000
3076; GCN-NEXT:    s_mov_b32 s10, 0x40e00000
3077; GCN-NEXT:    s_mov_b32 s9, 0x40c00000
3078; GCN-NEXT:    s_mov_b32 s8, 0x40a00000
3079; GCN-NEXT:    s_mov_b32 s7, 4.0
3080; GCN-NEXT:    s_mov_b32 s6, 0x40400000
3081; GCN-NEXT:    s_mov_b32 s5, 2.0
3082; GCN-NEXT:    s_movrels_b32 s0, s4
3083; GCN-NEXT:    v_mov_b32_e32 v0, s0
3084; GCN-NEXT:    ; return to shader part epilog
3085;
3086; GFX10-LABEL: dyn_extract_v15f32_const_s_s:
3087; GFX10:       ; %bb.0: ; %entry
3088; GFX10-NEXT:    s_mov_b32 s4, 1.0
3089; GFX10-NEXT:    s_mov_b32 m0, s2
3090; GFX10-NEXT:    s_mov_b32 s18, 0x41700000
3091; GFX10-NEXT:    s_mov_b32 s17, 0x41600000
3092; GFX10-NEXT:    s_mov_b32 s16, 0x41500000
3093; GFX10-NEXT:    s_mov_b32 s15, 0x41400000
3094; GFX10-NEXT:    s_mov_b32 s14, 0x41300000
3095; GFX10-NEXT:    s_mov_b32 s13, 0x41200000
3096; GFX10-NEXT:    s_mov_b32 s12, 0x41100000
3097; GFX10-NEXT:    s_mov_b32 s11, 0x41000000
3098; GFX10-NEXT:    s_mov_b32 s10, 0x40e00000
3099; GFX10-NEXT:    s_mov_b32 s9, 0x40c00000
3100; GFX10-NEXT:    s_mov_b32 s8, 0x40a00000
3101; GFX10-NEXT:    s_mov_b32 s7, 4.0
3102; GFX10-NEXT:    s_mov_b32 s6, 0x40400000
3103; GFX10-NEXT:    s_mov_b32 s5, 2.0
3104; GFX10-NEXT:    s_movrels_b32 s0, s4
3105; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3106; GFX10-NEXT:    ; return to shader part epilog
3107entry:
3108  %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3109  ret float %ext
3110}
3111
3112define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) {
3113; GCN-LABEL: dyn_extract_v15f32_s_v:
3114; GCN:       ; %bb.0: ; %entry
3115; GCN-NEXT:    s_mov_b32 s0, s2
3116; GCN-NEXT:    s_mov_b32 s1, s3
3117; GCN-NEXT:    s_mov_b32 s2, s4
3118; GCN-NEXT:    v_mov_b32_e32 v1, s0
3119; GCN-NEXT:    v_mov_b32_e32 v2, s1
3120; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3121; GCN-NEXT:    s_mov_b32 s3, s5
3122; GCN-NEXT:    v_mov_b32_e32 v3, s2
3123; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3124; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
3125; GCN-NEXT:    s_mov_b32 s4, s6
3126; GCN-NEXT:    v_mov_b32_e32 v4, s3
3127; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
3128; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
3129; GCN-NEXT:    s_mov_b32 s5, s7
3130; GCN-NEXT:    v_mov_b32_e32 v5, s4
3131; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
3132; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
3133; GCN-NEXT:    s_mov_b32 s6, s8
3134; GCN-NEXT:    v_mov_b32_e32 v6, s5
3135; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
3136; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
3137; GCN-NEXT:    s_mov_b32 s7, s9
3138; GCN-NEXT:    v_mov_b32_e32 v7, s6
3139; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
3140; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
3141; GCN-NEXT:    s_mov_b32 s8, s10
3142; GCN-NEXT:    v_mov_b32_e32 v8, s7
3143; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
3144; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
3145; GCN-NEXT:    s_mov_b32 s9, s11
3146; GCN-NEXT:    v_mov_b32_e32 v9, s8
3147; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
3148; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v0
3149; GCN-NEXT:    s_mov_b32 s10, s12
3150; GCN-NEXT:    v_mov_b32_e32 v10, s9
3151; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
3152; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
3153; GCN-NEXT:    v_mov_b32_e32 v11, s10
3154; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
3155; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
3156; GCN-NEXT:    v_mov_b32_e32 v12, s13
3157; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
3158; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
3159; GCN-NEXT:    v_mov_b32_e32 v13, s14
3160; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
3161; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
3162; GCN-NEXT:    v_mov_b32_e32 v14, s15
3163; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
3164; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
3165; GCN-NEXT:    v_mov_b32_e32 v15, s16
3166; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
3167; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
3168; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
3169; GCN-NEXT:    ; return to shader part epilog
3170;
3171; GFX10-LABEL: dyn_extract_v15f32_s_v:
3172; GFX10:       ; %bb.0: ; %entry
3173; GFX10-NEXT:    s_mov_b32 s1, s3
3174; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3175; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3176; GFX10-NEXT:    s_mov_b32 s0, s2
3177; GFX10-NEXT:    s_mov_b32 s2, s4
3178; GFX10-NEXT:    s_mov_b32 s3, s5
3179; GFX10-NEXT:    s_mov_b32 s4, s6
3180; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
3181; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
3182; GFX10-NEXT:    s_mov_b32 s5, s7
3183; GFX10-NEXT:    s_mov_b32 s6, s8
3184; GFX10-NEXT:    s_mov_b32 s7, s9
3185; GFX10-NEXT:    s_mov_b32 s8, s10
3186; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
3187; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
3188; GFX10-NEXT:    s_mov_b32 s9, s11
3189; GFX10-NEXT:    s_mov_b32 s10, s12
3190; GFX10-NEXT:    s_mov_b32 s11, s13
3191; GFX10-NEXT:    s_mov_b32 s12, s14
3192; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
3193; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
3194; GFX10-NEXT:    s_mov_b32 s13, s15
3195; GFX10-NEXT:    s_mov_b32 s14, s16
3196; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
3197; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
3198; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
3199; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
3200; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
3201; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
3202; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s7, vcc_lo
3203; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
3204; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
3205; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
3206; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s9, vcc_lo
3207; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
3208; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
3209; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
3210; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s11, vcc_lo
3211; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
3212; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
3213; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
3214; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s13, vcc_lo
3215; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
3216; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s14, vcc_lo
3217; GFX10-NEXT:    ; return to shader part epilog
3218entry:
3219  %ext = extractelement <15 x float> %vec, i32 %sel
3220  ret float %ext
3221}
3222
3223define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) {
3224; GCN-LABEL: dyn_extract_v15f32_v_v:
3225; GCN:       ; %bb.0: ; %entry
3226; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3227; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3228; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3229; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3230; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3231; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3232; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3233; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3234; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3235; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3236; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3237; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3238; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3239; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3240; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3241; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3242; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3243; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3244; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3245; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3246; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3247; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3248; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3249; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3250; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3251; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3252; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3253; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3254; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3255; GCN-NEXT:    s_setpc_b64 s[30:31]
3256;
3257; GFX10-LABEL: dyn_extract_v15f32_v_v:
3258; GFX10:       ; %bb.0: ; %entry
3259; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3260; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3261; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
3262; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3263; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v15
3264; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3265; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
3266; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3267; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v15
3268; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3269; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v15
3270; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3271; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v15
3272; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3273; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v15
3274; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3275; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v15
3276; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3277; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v15
3278; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3279; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v15
3280; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3281; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v15
3282; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3283; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v15
3284; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3285; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v15
3286; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3287; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v15
3288; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3289; GFX10-NEXT:    s_setpc_b64 s[30:31]
3290entry:
3291  %ext = extractelement <15 x float> %vec, i32 %sel
3292  ret float %ext
3293}
3294
3295define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) {
3296; GPRIDX-LABEL: dyn_extract_v15f32_v_s:
3297; GPRIDX:       ; %bb.0: ; %entry
3298; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
3299; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
3300; GPRIDX-NEXT:    s_set_gpr_idx_off
3301; GPRIDX-NEXT:    ; return to shader part epilog
3302;
3303; MOVREL-LABEL: dyn_extract_v15f32_v_s:
3304; MOVREL:       ; %bb.0: ; %entry
3305; MOVREL-NEXT:    s_mov_b32 m0, s2
3306; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
3307; MOVREL-NEXT:    ; return to shader part epilog
3308;
3309; GFX10-LABEL: dyn_extract_v15f32_v_s:
3310; GFX10:       ; %bb.0: ; %entry
3311; GFX10-NEXT:    s_mov_b32 m0, s2
3312; GFX10-NEXT:    v_movrels_b32_e32 v0, v0
3313; GFX10-NEXT:    ; return to shader part epilog
3314entry:
3315  %ext = extractelement <15 x float> %vec, i32 %sel
3316  ret float %ext
3317}
3318
3319define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) {
3320; GCN-LABEL: dyn_extract_v15f32_s_s:
3321; GCN:       ; %bb.0: ; %entry
3322; GCN-NEXT:    s_mov_b32 s0, s2
3323; GCN-NEXT:    s_mov_b32 m0, s17
3324; GCN-NEXT:    s_mov_b32 s1, s3
3325; GCN-NEXT:    s_mov_b32 s2, s4
3326; GCN-NEXT:    s_mov_b32 s3, s5
3327; GCN-NEXT:    s_mov_b32 s4, s6
3328; GCN-NEXT:    s_mov_b32 s5, s7
3329; GCN-NEXT:    s_mov_b32 s6, s8
3330; GCN-NEXT:    s_mov_b32 s7, s9
3331; GCN-NEXT:    s_mov_b32 s8, s10
3332; GCN-NEXT:    s_mov_b32 s9, s11
3333; GCN-NEXT:    s_mov_b32 s10, s12
3334; GCN-NEXT:    s_mov_b32 s11, s13
3335; GCN-NEXT:    s_mov_b32 s12, s14
3336; GCN-NEXT:    s_mov_b32 s13, s15
3337; GCN-NEXT:    s_mov_b32 s14, s16
3338; GCN-NEXT:    s_movrels_b32 s0, s0
3339; GCN-NEXT:    v_mov_b32_e32 v0, s0
3340; GCN-NEXT:    ; return to shader part epilog
3341;
3342; GFX10-LABEL: dyn_extract_v15f32_s_s:
3343; GFX10:       ; %bb.0: ; %entry
3344; GFX10-NEXT:    s_mov_b32 s0, s2
3345; GFX10-NEXT:    s_mov_b32 m0, s17
3346; GFX10-NEXT:    s_mov_b32 s1, s3
3347; GFX10-NEXT:    s_mov_b32 s2, s4
3348; GFX10-NEXT:    s_mov_b32 s3, s5
3349; GFX10-NEXT:    s_mov_b32 s4, s6
3350; GFX10-NEXT:    s_mov_b32 s5, s7
3351; GFX10-NEXT:    s_mov_b32 s6, s8
3352; GFX10-NEXT:    s_mov_b32 s7, s9
3353; GFX10-NEXT:    s_mov_b32 s8, s10
3354; GFX10-NEXT:    s_mov_b32 s9, s11
3355; GFX10-NEXT:    s_mov_b32 s10, s12
3356; GFX10-NEXT:    s_mov_b32 s11, s13
3357; GFX10-NEXT:    s_mov_b32 s12, s14
3358; GFX10-NEXT:    s_mov_b32 s13, s15
3359; GFX10-NEXT:    s_mov_b32 s14, s16
3360; GFX10-NEXT:    s_movrels_b32 s0, s0
3361; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3362; GFX10-NEXT:    ; return to shader part epilog
3363entry:
3364  %ext = extractelement <15 x float> %vec, i32 %sel
3365  ret float %ext
3366}
3367
3368define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) {
3369; GCN-LABEL: dyn_extract_v15f32_s_s_offset3:
3370; GCN:       ; %bb.0: ; %entry
3371; GCN-NEXT:    s_mov_b32 s0, s2
3372; GCN-NEXT:    s_mov_b32 s1, s3
3373; GCN-NEXT:    s_mov_b32 s3, s5
3374; GCN-NEXT:    s_mov_b32 m0, s17
3375; GCN-NEXT:    s_mov_b32 s2, s4
3376; GCN-NEXT:    s_mov_b32 s4, s6
3377; GCN-NEXT:    s_mov_b32 s5, s7
3378; GCN-NEXT:    s_mov_b32 s6, s8
3379; GCN-NEXT:    s_mov_b32 s7, s9
3380; GCN-NEXT:    s_mov_b32 s8, s10
3381; GCN-NEXT:    s_mov_b32 s9, s11
3382; GCN-NEXT:    s_mov_b32 s10, s12
3383; GCN-NEXT:    s_mov_b32 s11, s13
3384; GCN-NEXT:    s_mov_b32 s12, s14
3385; GCN-NEXT:    s_mov_b32 s13, s15
3386; GCN-NEXT:    s_mov_b32 s14, s16
3387; GCN-NEXT:    s_movrels_b32 s0, s3
3388; GCN-NEXT:    v_mov_b32_e32 v0, s0
3389; GCN-NEXT:    ; return to shader part epilog
3390;
3391; GFX10-LABEL: dyn_extract_v15f32_s_s_offset3:
3392; GFX10:       ; %bb.0: ; %entry
3393; GFX10-NEXT:    s_mov_b32 s1, s3
3394; GFX10-NEXT:    s_mov_b32 s3, s5
3395; GFX10-NEXT:    s_mov_b32 m0, s17
3396; GFX10-NEXT:    s_mov_b32 s0, s2
3397; GFX10-NEXT:    s_mov_b32 s2, s4
3398; GFX10-NEXT:    s_mov_b32 s4, s6
3399; GFX10-NEXT:    s_mov_b32 s5, s7
3400; GFX10-NEXT:    s_mov_b32 s6, s8
3401; GFX10-NEXT:    s_mov_b32 s7, s9
3402; GFX10-NEXT:    s_mov_b32 s8, s10
3403; GFX10-NEXT:    s_mov_b32 s9, s11
3404; GFX10-NEXT:    s_mov_b32 s10, s12
3405; GFX10-NEXT:    s_mov_b32 s11, s13
3406; GFX10-NEXT:    s_mov_b32 s12, s14
3407; GFX10-NEXT:    s_mov_b32 s13, s15
3408; GFX10-NEXT:    s_mov_b32 s14, s16
3409; GFX10-NEXT:    s_movrels_b32 s0, s3
3410; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3411; GFX10-NEXT:    ; return to shader part epilog
3412entry:
3413  %add = add i32 %sel, 3
3414  %ext = extractelement <15 x float> %vec, i32 %add
3415  ret float %ext
3416}
3417
3418define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) {
3419; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3:
3420; GPRIDX:       ; %bb.0: ; %entry
3421; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3422; GPRIDX-NEXT:    v_add_u32_e32 v15, 3, v15
3423; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3424; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3425; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3426; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3427; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3428; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3429; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3430; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3431; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3432; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3433; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3434; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3435; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3436; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3437; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3438; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3439; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3440; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3441; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3442; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3443; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3444; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3445; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3446; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3447; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3448; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3449; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3450; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3451; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
3452;
3453; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3:
3454; MOVREL:       ; %bb.0: ; %entry
3455; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3456; MOVREL-NEXT:    v_add_u32_e32 v15, vcc, 3, v15
3457; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3458; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3459; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3460; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3461; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3462; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3463; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3464; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3465; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3466; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3467; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3468; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3469; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3470; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3471; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3472; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3473; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3474; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3475; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3476; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3477; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3478; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3479; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3480; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3481; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3482; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3483; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3484; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3485; MOVREL-NEXT:    s_setpc_b64 s[30:31]
3486;
3487; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3:
3488; GFX10:       ; %bb.0: ; %entry
3489; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3490; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3491; GFX10-NEXT:    v_add_nc_u32_e32 v15, 3, v15
3492; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
3493; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3494; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v15
3495; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3496; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
3497; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3498; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v15
3499; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3500; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v15
3501; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3502; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v15
3503; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3504; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v15
3505; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3506; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v15
3507; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3508; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v15
3509; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3510; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v15
3511; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3512; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v15
3513; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3514; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v15
3515; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3516; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v15
3517; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3518; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v15
3519; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3520; GFX10-NEXT:    s_setpc_b64 s[30:31]
3521entry:
3522  %add = add i32 %sel, 3
3523  %ext = extractelement <15 x float> %vec, i32 %add
3524  ret float %ext
3525}
3526
3527define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32 %sel) {
3528; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s:
3529; GPRIDX:         .amd_kernel_code_t
3530; GPRIDX-NEXT:     amd_code_version_major = 1
3531; GPRIDX-NEXT:     amd_code_version_minor = 2
3532; GPRIDX-NEXT:     amd_machine_kind = 1
3533; GPRIDX-NEXT:     amd_machine_version_major = 9
3534; GPRIDX-NEXT:     amd_machine_version_minor = 0
3535; GPRIDX-NEXT:     amd_machine_version_stepping = 0
3536; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
3537; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
3538; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
3539; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 0
3540; GPRIDX-NEXT:     priority = 0
3541; GPRIDX-NEXT:     float_mode = 240
3542; GPRIDX-NEXT:     priv = 0
3543; GPRIDX-NEXT:     enable_dx10_clamp = 1
3544; GPRIDX-NEXT:     debug_mode = 0
3545; GPRIDX-NEXT:     enable_ieee_mode = 1
3546; GPRIDX-NEXT:     enable_wgp_mode = 0
3547; GPRIDX-NEXT:     enable_mem_ordered = 0
3548; GPRIDX-NEXT:     enable_fwd_progress = 0
3549; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3550; GPRIDX-NEXT:     user_sgpr_count = 6
3551; GPRIDX-NEXT:     enable_trap_handler = 0
3552; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
3553; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 0
3554; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 0
3555; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
3556; GPRIDX-NEXT:     enable_vgpr_workitem_id = 0
3557; GPRIDX-NEXT:     enable_exception_msb = 0
3558; GPRIDX-NEXT:     granulated_lds_size = 0
3559; GPRIDX-NEXT:     enable_exception = 0
3560; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
3561; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 0
3562; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 0
3563; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3564; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 0
3565; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
3566; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
3567; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3568; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3569; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3570; GPRIDX-NEXT:     enable_wavefront_size32 = 0
3571; GPRIDX-NEXT:     enable_ordered_append_gds = 0
3572; GPRIDX-NEXT:     private_element_size = 1
3573; GPRIDX-NEXT:     is_ptr64 = 1
3574; GPRIDX-NEXT:     is_dynamic_callstack = 0
3575; GPRIDX-NEXT:     is_debug_enabled = 0
3576; GPRIDX-NEXT:     is_xnack_enabled = 1
3577; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
3578; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
3579; GPRIDX-NEXT:     gds_segment_byte_size = 0
3580; GPRIDX-NEXT:     kernarg_segment_byte_size = 12
3581; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
3582; GPRIDX-NEXT:     wavefront_sgpr_count = 6
3583; GPRIDX-NEXT:     workitem_vgpr_count = 2
3584; GPRIDX-NEXT:     reserved_vgpr_first = 0
3585; GPRIDX-NEXT:     reserved_vgpr_count = 0
3586; GPRIDX-NEXT:     reserved_sgpr_first = 0
3587; GPRIDX-NEXT:     reserved_sgpr_count = 0
3588; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3589; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
3590; GPRIDX-NEXT:     kernarg_segment_alignment = 4
3591; GPRIDX-NEXT:     group_segment_alignment = 4
3592; GPRIDX-NEXT:     private_segment_alignment = 4
3593; GPRIDX-NEXT:     wavefront_size = 6
3594; GPRIDX-NEXT:     call_convention = -1
3595; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
3596; GPRIDX-NEXT:    .end_amd_kernel_code_t
3597; GPRIDX-NEXT:  ; %bb.0: ; %entry
3598; GPRIDX-NEXT:    s_load_dword s2, s[4:5], 0x8
3599; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3600; GPRIDX-NEXT:    v_mov_b32_e32 v1, 0
3601; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
3602; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 1
3603; GPRIDX-NEXT:    s_cselect_b32 s3, 2.0, 1.0
3604; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 2
3605; GPRIDX-NEXT:    s_cselect_b32 s3, 0x40400000, s3
3606; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 3
3607; GPRIDX-NEXT:    s_cselect_b32 s2, 4.0, s3
3608; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
3609; GPRIDX-NEXT:    global_store_dword v1, v0, s[0:1]
3610; GPRIDX-NEXT:    s_endpgm
3611;
3612; MOVREL-LABEL: dyn_extract_v4f32_s_s_s:
3613; MOVREL:         .amd_kernel_code_t
3614; MOVREL-NEXT:     amd_code_version_major = 1
3615; MOVREL-NEXT:     amd_code_version_minor = 2
3616; MOVREL-NEXT:     amd_machine_kind = 1
3617; MOVREL-NEXT:     amd_machine_version_major = 8
3618; MOVREL-NEXT:     amd_machine_version_minor = 0
3619; MOVREL-NEXT:     amd_machine_version_stepping = 3
3620; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
3621; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
3622; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
3623; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 0
3624; MOVREL-NEXT:     priority = 0
3625; MOVREL-NEXT:     float_mode = 240
3626; MOVREL-NEXT:     priv = 0
3627; MOVREL-NEXT:     enable_dx10_clamp = 1
3628; MOVREL-NEXT:     debug_mode = 0
3629; MOVREL-NEXT:     enable_ieee_mode = 1
3630; MOVREL-NEXT:     enable_wgp_mode = 0
3631; MOVREL-NEXT:     enable_mem_ordered = 0
3632; MOVREL-NEXT:     enable_fwd_progress = 0
3633; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3634; MOVREL-NEXT:     user_sgpr_count = 6
3635; MOVREL-NEXT:     enable_trap_handler = 0
3636; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
3637; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 0
3638; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 0
3639; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
3640; MOVREL-NEXT:     enable_vgpr_workitem_id = 0
3641; MOVREL-NEXT:     enable_exception_msb = 0
3642; MOVREL-NEXT:     granulated_lds_size = 0
3643; MOVREL-NEXT:     enable_exception = 0
3644; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
3645; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 0
3646; MOVREL-NEXT:     enable_sgpr_queue_ptr = 0
3647; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3648; MOVREL-NEXT:     enable_sgpr_dispatch_id = 0
3649; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
3650; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
3651; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3652; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3653; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3654; MOVREL-NEXT:     enable_wavefront_size32 = 0
3655; MOVREL-NEXT:     enable_ordered_append_gds = 0
3656; MOVREL-NEXT:     private_element_size = 1
3657; MOVREL-NEXT:     is_ptr64 = 1
3658; MOVREL-NEXT:     is_dynamic_callstack = 0
3659; MOVREL-NEXT:     is_debug_enabled = 0
3660; MOVREL-NEXT:     is_xnack_enabled = 0
3661; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
3662; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
3663; MOVREL-NEXT:     gds_segment_byte_size = 0
3664; MOVREL-NEXT:     kernarg_segment_byte_size = 12
3665; MOVREL-NEXT:     workgroup_fbarrier_count = 0
3666; MOVREL-NEXT:     wavefront_sgpr_count = 6
3667; MOVREL-NEXT:     workitem_vgpr_count = 3
3668; MOVREL-NEXT:     reserved_vgpr_first = 0
3669; MOVREL-NEXT:     reserved_vgpr_count = 0
3670; MOVREL-NEXT:     reserved_sgpr_first = 0
3671; MOVREL-NEXT:     reserved_sgpr_count = 0
3672; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3673; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
3674; MOVREL-NEXT:     kernarg_segment_alignment = 4
3675; MOVREL-NEXT:     group_segment_alignment = 4
3676; MOVREL-NEXT:     private_segment_alignment = 4
3677; MOVREL-NEXT:     wavefront_size = 6
3678; MOVREL-NEXT:     call_convention = -1
3679; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
3680; MOVREL-NEXT:    .end_amd_kernel_code_t
3681; MOVREL-NEXT:  ; %bb.0: ; %entry
3682; MOVREL-NEXT:    s_load_dword s2, s[4:5], 0x8
3683; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3684; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
3685; MOVREL-NEXT:    s_cmp_eq_u32 s2, 1
3686; MOVREL-NEXT:    s_cselect_b32 s3, 2.0, 1.0
3687; MOVREL-NEXT:    s_cmp_eq_u32 s2, 2
3688; MOVREL-NEXT:    s_cselect_b32 s3, 0x40400000, s3
3689; MOVREL-NEXT:    s_cmp_eq_u32 s2, 3
3690; MOVREL-NEXT:    s_cselect_b32 s2, 4.0, s3
3691; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
3692; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
3693; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
3694; MOVREL-NEXT:    flat_store_dword v[0:1], v2
3695; MOVREL-NEXT:    s_endpgm
3696;
3697; GFX10-LABEL: dyn_extract_v4f32_s_s_s:
3698; GFX10:         .amd_kernel_code_t
3699; GFX10-NEXT:     amd_code_version_major = 1
3700; GFX10-NEXT:     amd_code_version_minor = 2
3701; GFX10-NEXT:     amd_machine_kind = 1
3702; GFX10-NEXT:     amd_machine_version_major = 10
3703; GFX10-NEXT:     amd_machine_version_minor = 1
3704; GFX10-NEXT:     amd_machine_version_stepping = 0
3705; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
3706; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
3707; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
3708; GFX10-NEXT:     granulated_wavefront_sgpr_count = 0
3709; GFX10-NEXT:     priority = 0
3710; GFX10-NEXT:     float_mode = 240
3711; GFX10-NEXT:     priv = 0
3712; GFX10-NEXT:     enable_dx10_clamp = 1
3713; GFX10-NEXT:     debug_mode = 0
3714; GFX10-NEXT:     enable_ieee_mode = 1
3715; GFX10-NEXT:     enable_wgp_mode = 1
3716; GFX10-NEXT:     enable_mem_ordered = 1
3717; GFX10-NEXT:     enable_fwd_progress = 0
3718; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3719; GFX10-NEXT:     user_sgpr_count = 6
3720; GFX10-NEXT:     enable_trap_handler = 0
3721; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
3722; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 0
3723; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 0
3724; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
3725; GFX10-NEXT:     enable_vgpr_workitem_id = 0
3726; GFX10-NEXT:     enable_exception_msb = 0
3727; GFX10-NEXT:     granulated_lds_size = 0
3728; GFX10-NEXT:     enable_exception = 0
3729; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
3730; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 0
3731; GFX10-NEXT:     enable_sgpr_queue_ptr = 0
3732; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3733; GFX10-NEXT:     enable_sgpr_dispatch_id = 0
3734; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
3735; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
3736; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3737; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3738; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3739; GFX10-NEXT:     enable_wavefront_size32 = 1
3740; GFX10-NEXT:     enable_ordered_append_gds = 0
3741; GFX10-NEXT:     private_element_size = 1
3742; GFX10-NEXT:     is_ptr64 = 1
3743; GFX10-NEXT:     is_dynamic_callstack = 0
3744; GFX10-NEXT:     is_debug_enabled = 0
3745; GFX10-NEXT:     is_xnack_enabled = 1
3746; GFX10-NEXT:     workitem_private_segment_byte_size = 0
3747; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
3748; GFX10-NEXT:     gds_segment_byte_size = 0
3749; GFX10-NEXT:     kernarg_segment_byte_size = 12
3750; GFX10-NEXT:     workgroup_fbarrier_count = 0
3751; GFX10-NEXT:     wavefront_sgpr_count = 6
3752; GFX10-NEXT:     workitem_vgpr_count = 2
3753; GFX10-NEXT:     reserved_vgpr_first = 0
3754; GFX10-NEXT:     reserved_vgpr_count = 0
3755; GFX10-NEXT:     reserved_sgpr_first = 0
3756; GFX10-NEXT:     reserved_sgpr_count = 0
3757; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3758; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
3759; GFX10-NEXT:     kernarg_segment_alignment = 4
3760; GFX10-NEXT:     group_segment_alignment = 4
3761; GFX10-NEXT:     private_segment_alignment = 4
3762; GFX10-NEXT:     wavefront_size = 5
3763; GFX10-NEXT:     call_convention = -1
3764; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
3765; GFX10-NEXT:    .end_amd_kernel_code_t
3766; GFX10-NEXT:  ; %bb.0: ; %entry
3767; GFX10-NEXT:    s_clause 0x1
3768; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8
3769; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3770; GFX10-NEXT:    v_mov_b32_e32 v1, 0
3771; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3772; GFX10-NEXT:    s_cmp_eq_u32 s2, 1
3773; GFX10-NEXT:    s_cselect_b32 s3, 2.0, 1.0
3774; GFX10-NEXT:    s_cmp_eq_u32 s2, 2
3775; GFX10-NEXT:    s_cselect_b32 s3, 0x40400000, s3
3776; GFX10-NEXT:    s_cmp_eq_u32 s2, 3
3777; GFX10-NEXT:    s_cselect_b32 s2, 4.0, s3
3778; GFX10-NEXT:    v_mov_b32_e32 v0, s2
3779; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
3780; GFX10-NEXT:    s_endpgm
3781entry:
3782  %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel
3783  store float %ext, float addrspace(1)* %out
3784  ret void
3785}
3786
3787define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i32 %sel) {
3788; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s:
3789; GPRIDX:         .amd_kernel_code_t
3790; GPRIDX-NEXT:     amd_code_version_major = 1
3791; GPRIDX-NEXT:     amd_code_version_minor = 2
3792; GPRIDX-NEXT:     amd_machine_kind = 1
3793; GPRIDX-NEXT:     amd_machine_version_major = 9
3794; GPRIDX-NEXT:     amd_machine_version_minor = 0
3795; GPRIDX-NEXT:     amd_machine_version_stepping = 0
3796; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
3797; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
3798; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
3799; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 0
3800; GPRIDX-NEXT:     priority = 0
3801; GPRIDX-NEXT:     float_mode = 240
3802; GPRIDX-NEXT:     priv = 0
3803; GPRIDX-NEXT:     enable_dx10_clamp = 1
3804; GPRIDX-NEXT:     debug_mode = 0
3805; GPRIDX-NEXT:     enable_ieee_mode = 1
3806; GPRIDX-NEXT:     enable_wgp_mode = 0
3807; GPRIDX-NEXT:     enable_mem_ordered = 0
3808; GPRIDX-NEXT:     enable_fwd_progress = 0
3809; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3810; GPRIDX-NEXT:     user_sgpr_count = 6
3811; GPRIDX-NEXT:     enable_trap_handler = 0
3812; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
3813; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 0
3814; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 0
3815; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
3816; GPRIDX-NEXT:     enable_vgpr_workitem_id = 0
3817; GPRIDX-NEXT:     enable_exception_msb = 0
3818; GPRIDX-NEXT:     granulated_lds_size = 0
3819; GPRIDX-NEXT:     enable_exception = 0
3820; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
3821; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 0
3822; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 0
3823; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3824; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 0
3825; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
3826; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
3827; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3828; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3829; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3830; GPRIDX-NEXT:     enable_wavefront_size32 = 0
3831; GPRIDX-NEXT:     enable_ordered_append_gds = 0
3832; GPRIDX-NEXT:     private_element_size = 1
3833; GPRIDX-NEXT:     is_ptr64 = 1
3834; GPRIDX-NEXT:     is_dynamic_callstack = 0
3835; GPRIDX-NEXT:     is_debug_enabled = 0
3836; GPRIDX-NEXT:     is_xnack_enabled = 1
3837; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
3838; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
3839; GPRIDX-NEXT:     gds_segment_byte_size = 0
3840; GPRIDX-NEXT:     kernarg_segment_byte_size = 12
3841; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
3842; GPRIDX-NEXT:     wavefront_sgpr_count = 7
3843; GPRIDX-NEXT:     workitem_vgpr_count = 3
3844; GPRIDX-NEXT:     reserved_vgpr_first = 0
3845; GPRIDX-NEXT:     reserved_vgpr_count = 0
3846; GPRIDX-NEXT:     reserved_sgpr_first = 0
3847; GPRIDX-NEXT:     reserved_sgpr_count = 0
3848; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3849; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
3850; GPRIDX-NEXT:     kernarg_segment_alignment = 4
3851; GPRIDX-NEXT:     group_segment_alignment = 4
3852; GPRIDX-NEXT:     private_segment_alignment = 4
3853; GPRIDX-NEXT:     wavefront_size = 6
3854; GPRIDX-NEXT:     call_convention = -1
3855; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
3856; GPRIDX-NEXT:    .end_amd_kernel_code_t
3857; GPRIDX-NEXT:  ; %bb.0: ; %entry
3858; GPRIDX-NEXT:    s_load_dword s6, s[4:5], 0x8
3859; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3860; GPRIDX-NEXT:    s_mov_b32 s2, 0
3861; GPRIDX-NEXT:    s_mov_b32 s3, 0x40080000
3862; GPRIDX-NEXT:    v_mov_b32_e32 v2, 0
3863; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
3864; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 1
3865; GPRIDX-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
3866; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 2
3867; GPRIDX-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3868; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 3
3869; GPRIDX-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
3870; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
3871; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
3872; GPRIDX-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
3873; GPRIDX-NEXT:    s_endpgm
3874;
3875; MOVREL-LABEL: dyn_extract_v4f64_s_s_s:
3876; MOVREL:         .amd_kernel_code_t
3877; MOVREL-NEXT:     amd_code_version_major = 1
3878; MOVREL-NEXT:     amd_code_version_minor = 2
3879; MOVREL-NEXT:     amd_machine_kind = 1
3880; MOVREL-NEXT:     amd_machine_version_major = 8
3881; MOVREL-NEXT:     amd_machine_version_minor = 0
3882; MOVREL-NEXT:     amd_machine_version_stepping = 3
3883; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
3884; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
3885; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
3886; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 0
3887; MOVREL-NEXT:     priority = 0
3888; MOVREL-NEXT:     float_mode = 240
3889; MOVREL-NEXT:     priv = 0
3890; MOVREL-NEXT:     enable_dx10_clamp = 1
3891; MOVREL-NEXT:     debug_mode = 0
3892; MOVREL-NEXT:     enable_ieee_mode = 1
3893; MOVREL-NEXT:     enable_wgp_mode = 0
3894; MOVREL-NEXT:     enable_mem_ordered = 0
3895; MOVREL-NEXT:     enable_fwd_progress = 0
3896; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3897; MOVREL-NEXT:     user_sgpr_count = 6
3898; MOVREL-NEXT:     enable_trap_handler = 0
3899; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
3900; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 0
3901; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 0
3902; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
3903; MOVREL-NEXT:     enable_vgpr_workitem_id = 0
3904; MOVREL-NEXT:     enable_exception_msb = 0
3905; MOVREL-NEXT:     granulated_lds_size = 0
3906; MOVREL-NEXT:     enable_exception = 0
3907; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
3908; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 0
3909; MOVREL-NEXT:     enable_sgpr_queue_ptr = 0
3910; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3911; MOVREL-NEXT:     enable_sgpr_dispatch_id = 0
3912; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
3913; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
3914; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3915; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3916; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3917; MOVREL-NEXT:     enable_wavefront_size32 = 0
3918; MOVREL-NEXT:     enable_ordered_append_gds = 0
3919; MOVREL-NEXT:     private_element_size = 1
3920; MOVREL-NEXT:     is_ptr64 = 1
3921; MOVREL-NEXT:     is_dynamic_callstack = 0
3922; MOVREL-NEXT:     is_debug_enabled = 0
3923; MOVREL-NEXT:     is_xnack_enabled = 0
3924; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
3925; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
3926; MOVREL-NEXT:     gds_segment_byte_size = 0
3927; MOVREL-NEXT:     kernarg_segment_byte_size = 12
3928; MOVREL-NEXT:     workgroup_fbarrier_count = 0
3929; MOVREL-NEXT:     wavefront_sgpr_count = 7
3930; MOVREL-NEXT:     workitem_vgpr_count = 4
3931; MOVREL-NEXT:     reserved_vgpr_first = 0
3932; MOVREL-NEXT:     reserved_vgpr_count = 0
3933; MOVREL-NEXT:     reserved_sgpr_first = 0
3934; MOVREL-NEXT:     reserved_sgpr_count = 0
3935; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3936; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
3937; MOVREL-NEXT:     kernarg_segment_alignment = 4
3938; MOVREL-NEXT:     group_segment_alignment = 4
3939; MOVREL-NEXT:     private_segment_alignment = 4
3940; MOVREL-NEXT:     wavefront_size = 6
3941; MOVREL-NEXT:     call_convention = -1
3942; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
3943; MOVREL-NEXT:    .end_amd_kernel_code_t
3944; MOVREL-NEXT:  ; %bb.0: ; %entry
3945; MOVREL-NEXT:    s_load_dword s6, s[4:5], 0x8
3946; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3947; MOVREL-NEXT:    s_mov_b32 s2, 0
3948; MOVREL-NEXT:    s_mov_b32 s3, 0x40080000
3949; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
3950; MOVREL-NEXT:    s_cmp_eq_u32 s6, 1
3951; MOVREL-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
3952; MOVREL-NEXT:    s_cmp_eq_u32 s6, 2
3953; MOVREL-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3954; MOVREL-NEXT:    s_cmp_eq_u32 s6, 3
3955; MOVREL-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
3956; MOVREL-NEXT:    v_mov_b32_e32 v0, s2
3957; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
3958; MOVREL-NEXT:    v_mov_b32_e32 v1, s3
3959; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
3960; MOVREL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3961; MOVREL-NEXT:    s_endpgm
3962;
3963; GFX10-LABEL: dyn_extract_v4f64_s_s_s:
3964; GFX10:         .amd_kernel_code_t
3965; GFX10-NEXT:     amd_code_version_major = 1
3966; GFX10-NEXT:     amd_code_version_minor = 2
3967; GFX10-NEXT:     amd_machine_kind = 1
3968; GFX10-NEXT:     amd_machine_version_major = 10
3969; GFX10-NEXT:     amd_machine_version_minor = 1
3970; GFX10-NEXT:     amd_machine_version_stepping = 0
3971; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
3972; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
3973; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
3974; GFX10-NEXT:     granulated_wavefront_sgpr_count = 0
3975; GFX10-NEXT:     priority = 0
3976; GFX10-NEXT:     float_mode = 240
3977; GFX10-NEXT:     priv = 0
3978; GFX10-NEXT:     enable_dx10_clamp = 1
3979; GFX10-NEXT:     debug_mode = 0
3980; GFX10-NEXT:     enable_ieee_mode = 1
3981; GFX10-NEXT:     enable_wgp_mode = 1
3982; GFX10-NEXT:     enable_mem_ordered = 1
3983; GFX10-NEXT:     enable_fwd_progress = 0
3984; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3985; GFX10-NEXT:     user_sgpr_count = 6
3986; GFX10-NEXT:     enable_trap_handler = 0
3987; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
3988; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 0
3989; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 0
3990; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
3991; GFX10-NEXT:     enable_vgpr_workitem_id = 0
3992; GFX10-NEXT:     enable_exception_msb = 0
3993; GFX10-NEXT:     granulated_lds_size = 0
3994; GFX10-NEXT:     enable_exception = 0
3995; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
3996; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 0
3997; GFX10-NEXT:     enable_sgpr_queue_ptr = 0
3998; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3999; GFX10-NEXT:     enable_sgpr_dispatch_id = 0
4000; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
4001; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
4002; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4003; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4004; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4005; GFX10-NEXT:     enable_wavefront_size32 = 1
4006; GFX10-NEXT:     enable_ordered_append_gds = 0
4007; GFX10-NEXT:     private_element_size = 1
4008; GFX10-NEXT:     is_ptr64 = 1
4009; GFX10-NEXT:     is_dynamic_callstack = 0
4010; GFX10-NEXT:     is_debug_enabled = 0
4011; GFX10-NEXT:     is_xnack_enabled = 1
4012; GFX10-NEXT:     workitem_private_segment_byte_size = 0
4013; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
4014; GFX10-NEXT:     gds_segment_byte_size = 0
4015; GFX10-NEXT:     kernarg_segment_byte_size = 12
4016; GFX10-NEXT:     workgroup_fbarrier_count = 0
4017; GFX10-NEXT:     wavefront_sgpr_count = 7
4018; GFX10-NEXT:     workitem_vgpr_count = 3
4019; GFX10-NEXT:     reserved_vgpr_first = 0
4020; GFX10-NEXT:     reserved_vgpr_count = 0
4021; GFX10-NEXT:     reserved_sgpr_first = 0
4022; GFX10-NEXT:     reserved_sgpr_count = 0
4023; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4024; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
4025; GFX10-NEXT:     kernarg_segment_alignment = 4
4026; GFX10-NEXT:     group_segment_alignment = 4
4027; GFX10-NEXT:     private_segment_alignment = 4
4028; GFX10-NEXT:     wavefront_size = 5
4029; GFX10-NEXT:     call_convention = -1
4030; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
4031; GFX10-NEXT:    .end_amd_kernel_code_t
4032; GFX10-NEXT:  ; %bb.0: ; %entry
4033; GFX10-NEXT:    s_clause 0x1
4034; GFX10-NEXT:    s_load_dword s6, s[4:5], 0x8
4035; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4036; GFX10-NEXT:    s_mov_b32 s2, 0
4037; GFX10-NEXT:    s_mov_b32 s3, 0x40080000
4038; GFX10-NEXT:    v_mov_b32_e32 v2, 0
4039; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
4040; GFX10-NEXT:    s_cmp_eq_u32 s6, 1
4041; GFX10-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
4042; GFX10-NEXT:    s_cmp_eq_u32 s6, 2
4043; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
4044; GFX10-NEXT:    s_cmp_eq_u32 s6, 3
4045; GFX10-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
4046; GFX10-NEXT:    v_mov_b32_e32 v0, s2
4047; GFX10-NEXT:    v_mov_b32_e32 v1, s3
4048; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
4049; GFX10-NEXT:    s_endpgm
4050entry:
4051  %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel
4052  store double %ext, double addrspace(1)* %out
4053  ret void
4054}
4055
4056define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) {
4057; GPRIDX-LABEL: v_extract_v64i32_7:
4058; GPRIDX:       ; %bb.0:
4059; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4060; GPRIDX-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:16
4061; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4062; GPRIDX-NEXT:    v_mov_b32_e32 v0, v7
4063; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4064;
4065; MOVREL-LABEL: v_extract_v64i32_7:
4066; MOVREL:       ; %bb.0:
4067; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4068; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
4069; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4070; MOVREL-NEXT:    flat_load_dwordx4 v[4:7], v[0:1]
4071; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4072; MOVREL-NEXT:    v_mov_b32_e32 v0, v7
4073; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4074;
4075; GFX10-LABEL: v_extract_v64i32_7:
4076; GFX10:       ; %bb.0:
4077; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4078; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4079; GFX10-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:16
4080; GFX10-NEXT:    s_waitcnt vmcnt(0)
4081; GFX10-NEXT:    v_mov_b32_e32 v0, v7
4082; GFX10-NEXT:    s_setpc_b64 s[30:31]
4083  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4084  %elt = extractelement <64 x i32> %vec, i32 7
4085  ret i32 %elt
4086}
4087
4088define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) {
4089; GPRIDX-LABEL: v_extract_v64i32_32:
4090; GPRIDX:       ; %bb.0:
4091; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4092; GPRIDX-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4093; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4094; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4095;
4096; MOVREL-LABEL: v_extract_v64i32_32:
4097; MOVREL:       ; %bb.0:
4098; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4099; MOVREL-NEXT:    s_mov_b64 s[4:5], 0x80
4100; MOVREL-NEXT:    v_mov_b32_e32 v2, s4
4101; MOVREL-NEXT:    v_mov_b32_e32 v3, s5
4102; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
4103; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
4104; MOVREL-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4105; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4106; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4107;
4108; GFX10-LABEL: v_extract_v64i32_32:
4109; GFX10:       ; %bb.0:
4110; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4111; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4112; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4113; GFX10-NEXT:    s_waitcnt vmcnt(0)
4114; GFX10-NEXT:    s_setpc_b64 s[30:31]
4115  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4116  %elt = extractelement <64 x i32> %vec, i32 32
4117  ret i32 %elt
4118}
4119
4120define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) {
4121; GPRIDX-LABEL: v_extract_v64i32_33:
4122; GPRIDX:       ; %bb.0:
4123; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4124; GPRIDX-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4125; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4126; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
4127; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4128;
4129; MOVREL-LABEL: v_extract_v64i32_33:
4130; MOVREL:       ; %bb.0:
4131; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4132; MOVREL-NEXT:    s_mov_b64 s[4:5], 0x80
4133; MOVREL-NEXT:    v_mov_b32_e32 v2, s4
4134; MOVREL-NEXT:    v_mov_b32_e32 v3, s5
4135; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
4136; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
4137; MOVREL-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4138; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4139; MOVREL-NEXT:    v_mov_b32_e32 v0, v1
4140; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4141;
4142; GFX10-LABEL: v_extract_v64i32_33:
4143; GFX10:       ; %bb.0:
4144; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4145; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4146; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4147; GFX10-NEXT:    s_waitcnt vmcnt(0)
4148; GFX10-NEXT:    v_mov_b32_e32 v0, v1
4149; GFX10-NEXT:    s_setpc_b64 s[30:31]
4150  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4151  %elt = extractelement <64 x i32> %vec, i32 33
4152  ret i32 %elt
4153}
4154
4155define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) {
4156; GPRIDX-LABEL: v_extract_v64i32_37:
4157; GPRIDX:       ; %bb.0:
4158; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4159; GPRIDX-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:144
4160; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4161; GPRIDX-NEXT:    v_mov_b32_e32 v0, v5
4162; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4163;
4164; MOVREL-LABEL: v_extract_v64i32_37:
4165; MOVREL:       ; %bb.0:
4166; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4167; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 0x90, v0
4168; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4169; MOVREL-NEXT:    flat_load_dwordx4 v[4:7], v[0:1]
4170; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4171; MOVREL-NEXT:    v_mov_b32_e32 v0, v5
4172; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4173;
4174; GFX10-LABEL: v_extract_v64i32_37:
4175; GFX10:       ; %bb.0:
4176; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4177; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4178; GFX10-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:144
4179; GFX10-NEXT:    s_waitcnt vmcnt(0)
4180; GFX10-NEXT:    v_mov_b32_e32 v0, v5
4181; GFX10-NEXT:    s_setpc_b64 s[30:31]
4182  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4183  %elt = extractelement <64 x i32> %vec, i32 37
4184  ret i32 %elt
4185}
4186