1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
6
7define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
8; GCN-LABEL: dyn_extract_v8f32_const_s_v:
9; GCN:       ; %bb.0: ; %entry
10; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
12; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
13; GCN-NEXT:    v_cndmask_b32_e64 v6, 1.0, 2.0, vcc
14; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
15; GCN-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
16; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
17; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
18; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
19; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
20; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
21; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
22; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
23; GCN-NEXT:    v_mov_b32_e32 v4, 0x40e00000
24; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
25; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
26; GCN-NEXT:    v_mov_b32_e32 v5, 0x41000000
27; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
28; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
29; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
30; GCN-NEXT:    s_setpc_b64 s[30:31]
31;
32; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v:
33; GFX10PLUS:       ; %bb.0: ; %entry
34; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
36; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
37; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
38; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
39; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
40; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
41; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
42; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
43; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
44; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
45; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
46; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
47; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
48; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
49; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo
50; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
51entry:
52  %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
53  ret float %ext
54}
55
56define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
57; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s:
58; GPRIDX:       ; %bb.0: ; %entry
59; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 1
60; GPRIDX-NEXT:    s_cselect_b32 s0, 2.0, 1.0
61; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 2
62; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40400000, s0
63; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 3
64; GPRIDX-NEXT:    s_cselect_b32 s0, 4.0, s0
65; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 4
66; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40a00000, s0
67; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 5
68; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40c00000, s0
69; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 6
70; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40e00000, s0
71; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 7
72; GPRIDX-NEXT:    s_cselect_b32 s0, 0x41000000, s0
73; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
74; GPRIDX-NEXT:    ; return to shader part epilog
75;
76; MOVREL-LABEL: dyn_extract_v8f32_const_s_s:
77; MOVREL:       ; %bb.0: ; %entry
78; MOVREL-NEXT:    s_mov_b32 s4, 1.0
79; MOVREL-NEXT:    s_mov_b32 m0, s2
80; MOVREL-NEXT:    s_mov_b32 s11, 0x41000000
81; MOVREL-NEXT:    s_mov_b32 s10, 0x40e00000
82; MOVREL-NEXT:    s_mov_b32 s9, 0x40c00000
83; MOVREL-NEXT:    s_mov_b32 s8, 0x40a00000
84; MOVREL-NEXT:    s_mov_b32 s7, 4.0
85; MOVREL-NEXT:    s_mov_b32 s6, 0x40400000
86; MOVREL-NEXT:    s_mov_b32 s5, 2.0
87; MOVREL-NEXT:    s_movrels_b32 s0, s4
88; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
89; MOVREL-NEXT:    ; return to shader part epilog
90;
91; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_s:
92; GFX10PLUS:       ; %bb.0: ; %entry
93; GFX10PLUS-NEXT:    s_mov_b32 s4, 1.0
94; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
95; GFX10PLUS-NEXT:    s_mov_b32 s11, 0x41000000
96; GFX10PLUS-NEXT:    s_mov_b32 s10, 0x40e00000
97; GFX10PLUS-NEXT:    s_mov_b32 s9, 0x40c00000
98; GFX10PLUS-NEXT:    s_mov_b32 s8, 0x40a00000
99; GFX10PLUS-NEXT:    s_mov_b32 s7, 4.0
100; GFX10PLUS-NEXT:    s_mov_b32 s6, 0x40400000
101; GFX10PLUS-NEXT:    s_mov_b32 s5, 2.0
102; GFX10PLUS-NEXT:    s_movrels_b32 s0, s4
103; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
104; GFX10PLUS-NEXT:    ; return to shader part epilog
105entry:
106  %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
107  ret float %ext
108}
109
110define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) {
111; GCN-LABEL: dyn_extract_v8f32_s_v:
112; GCN:       ; %bb.0: ; %entry
113; GCN-NEXT:    s_mov_b32 s0, s2
114; GCN-NEXT:    s_mov_b32 s1, s3
115; GCN-NEXT:    s_mov_b32 s2, s4
116; GCN-NEXT:    v_mov_b32_e32 v1, s0
117; GCN-NEXT:    v_mov_b32_e32 v2, s1
118; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
119; GCN-NEXT:    s_mov_b32 s3, s5
120; GCN-NEXT:    v_mov_b32_e32 v3, s2
121; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
122; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
123; GCN-NEXT:    v_mov_b32_e32 v4, s3
124; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
125; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
126; GCN-NEXT:    v_mov_b32_e32 v5, s6
127; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
128; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
129; GCN-NEXT:    v_mov_b32_e32 v6, s7
130; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
131; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
132; GCN-NEXT:    v_mov_b32_e32 v7, s8
133; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
134; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
135; GCN-NEXT:    v_mov_b32_e32 v8, s9
136; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
137; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
138; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v8, vcc
139; GCN-NEXT:    ; return to shader part epilog
140;
141; GFX10PLUS-LABEL: dyn_extract_v8f32_s_v:
142; GFX10PLUS:       ; %bb.0: ; %entry
143; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
144; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
145; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, s1
146; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
147; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
148; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
149; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
150; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
151; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
152; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
153; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
154; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
155; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
156; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
157; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
158; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
159; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
160; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
161; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
162; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
163; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
164; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
165; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, s7, vcc_lo
166; GFX10PLUS-NEXT:    ; return to shader part epilog
167entry:
168  %ext = extractelement <8 x float> %vec, i32 %sel
169  ret float %ext
170}
171
172define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) {
173; GCN-LABEL: dyn_extract_v8f32_v_v:
174; GCN:       ; %bb.0: ; %entry
175; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
177; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
178; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
179; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
180; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
181; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
182; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
183; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
184; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
185; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
186; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
187; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
188; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
189; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
190; GCN-NEXT:    s_setpc_b64 s[30:31]
191;
192; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v:
193; GFX10PLUS:       ; %bb.0: ; %entry
194; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
196; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
197; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
198; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
199; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
200; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
201; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
202; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
203; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
204; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
205; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
206; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
207; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
208; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
209; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
210; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
211entry:
212  %ext = extractelement <8 x float> %vec, i32 %sel
213  ret float %ext
214}
215
216define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) {
217; GPRIDX-LABEL: dyn_extract_v8f32_v_s:
218; GPRIDX:       ; %bb.0: ; %entry
219; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
220; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
221; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
222; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
223; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
224; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
225; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
226; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
227; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
228; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
229; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
230; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
231; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 7
232; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
233; GPRIDX-NEXT:    ; return to shader part epilog
234;
235; MOVREL-LABEL: dyn_extract_v8f32_v_s:
236; MOVREL:       ; %bb.0: ; %entry
237; MOVREL-NEXT:    s_mov_b32 m0, s2
238; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
239; MOVREL-NEXT:    ; return to shader part epilog
240;
241; GFX10PLUS-LABEL: dyn_extract_v8f32_v_s:
242; GFX10PLUS:       ; %bb.0: ; %entry
243; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
244; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v0
245; GFX10PLUS-NEXT:    ; return to shader part epilog
246entry:
247  %ext = extractelement <8 x float> %vec, i32 %sel
248  ret float %ext
249}
250
251define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) {
252; GPRIDX-LABEL: dyn_extract_v8f32_s_s:
253; GPRIDX:       ; %bb.0: ; %entry
254; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
255; GPRIDX-NEXT:    s_cselect_b32 s0, s3, s2
256; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
257; GPRIDX-NEXT:    s_cselect_b32 s0, s4, s0
258; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
259; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s0
260; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
261; GPRIDX-NEXT:    s_cselect_b32 s0, s6, s0
262; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
263; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s0
264; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
265; GPRIDX-NEXT:    s_cselect_b32 s0, s8, s0
266; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 7
267; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s0
268; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
269; GPRIDX-NEXT:    ; return to shader part epilog
270;
271; MOVREL-LABEL: dyn_extract_v8f32_s_s:
272; MOVREL:       ; %bb.0: ; %entry
273; MOVREL-NEXT:    s_mov_b32 s0, s2
274; MOVREL-NEXT:    s_mov_b32 m0, s10
275; MOVREL-NEXT:    s_mov_b32 s1, s3
276; MOVREL-NEXT:    s_mov_b32 s2, s4
277; MOVREL-NEXT:    s_mov_b32 s3, s5
278; MOVREL-NEXT:    s_mov_b32 s4, s6
279; MOVREL-NEXT:    s_mov_b32 s5, s7
280; MOVREL-NEXT:    s_mov_b32 s6, s8
281; MOVREL-NEXT:    s_mov_b32 s7, s9
282; MOVREL-NEXT:    s_movrels_b32 s0, s0
283; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
284; MOVREL-NEXT:    ; return to shader part epilog
285;
286; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s:
287; GFX10PLUS:       ; %bb.0: ; %entry
288; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
289; GFX10PLUS-NEXT:    s_mov_b32 m0, s10
290; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
291; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
292; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
293; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
294; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
295; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
296; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
297; GFX10PLUS-NEXT:    s_movrels_b32 s0, s0
298; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
299; GFX10PLUS-NEXT:    ; return to shader part epilog
300entry:
301  %ext = extractelement <8 x float> %vec, i32 %sel
302  ret float %ext
303}
304
305define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
306; GCN-LABEL: dyn_extract_v8i64_const_s_v:
307; GCN:       ; %bb.0: ; %entry
308; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309; GCN-NEXT:    s_mov_b64 s[4:5], 1
310; GCN-NEXT:    s_mov_b64 s[6:7], 2
311; GCN-NEXT:    v_mov_b32_e32 v1, s4
312; GCN-NEXT:    v_mov_b32_e32 v2, s5
313; GCN-NEXT:    v_mov_b32_e32 v3, s6
314; GCN-NEXT:    v_mov_b32_e32 v4, s7
315; GCN-NEXT:    s_mov_b64 s[8:9], 3
316; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
317; GCN-NEXT:    v_mov_b32_e32 v5, s8
318; GCN-NEXT:    v_mov_b32_e32 v6, s9
319; GCN-NEXT:    s_mov_b64 s[10:11], 4
320; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
321; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
322; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
323; GCN-NEXT:    v_mov_b32_e32 v7, s10
324; GCN-NEXT:    v_mov_b32_e32 v8, s11
325; GCN-NEXT:    s_mov_b64 s[12:13], 5
326; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
327; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
328; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
329; GCN-NEXT:    s_mov_b64 s[14:15], 6
330; GCN-NEXT:    v_mov_b32_e32 v9, s12
331; GCN-NEXT:    v_mov_b32_e32 v10, s13
332; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
333; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
334; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
335; GCN-NEXT:    s_mov_b64 s[16:17], 7
336; GCN-NEXT:    v_mov_b32_e32 v11, s14
337; GCN-NEXT:    v_mov_b32_e32 v12, s15
338; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
339; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
340; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
341; GCN-NEXT:    s_mov_b64 s[18:19], 8
342; GCN-NEXT:    v_mov_b32_e32 v13, s16
343; GCN-NEXT:    v_mov_b32_e32 v14, s17
344; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
345; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
346; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
347; GCN-NEXT:    v_mov_b32_e32 v15, s18
348; GCN-NEXT:    v_mov_b32_e32 v16, s19
349; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
350; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
351; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
352; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
353; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
354; GCN-NEXT:    s_setpc_b64 s[30:31]
355;
356; GFX10-LABEL: dyn_extract_v8i64_const_s_v:
357; GFX10:       ; %bb.0: ; %entry
358; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
360; GFX10-NEXT:    s_mov_b64 s[6:7], 2
361; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
362; GFX10-NEXT:    v_mov_b32_e32 v1, s6
363; GFX10-NEXT:    v_mov_b32_e32 v2, s7
364; GFX10-NEXT:    s_mov_b64 s[4:5], 1
365; GFX10-NEXT:    s_mov_b64 s[8:9], 3
366; GFX10-NEXT:    s_mov_b64 s[10:11], 4
367; GFX10-NEXT:    v_cndmask_b32_e32 v1, s4, v1, vcc_lo
368; GFX10-NEXT:    v_cndmask_b32_e32 v2, s5, v2, vcc_lo
369; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
370; GFX10-NEXT:    s_mov_b64 s[12:13], 5
371; GFX10-NEXT:    s_mov_b64 s[14:15], 6
372; GFX10-NEXT:    s_mov_b64 s[16:17], 7
373; GFX10-NEXT:    s_mov_b64 s[18:19], 8
374; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
375; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
376; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
377; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
378; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
379; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
380; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
381; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
382; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
383; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s14, vcc_lo
384; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s15, vcc_lo
385; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
386; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s16, vcc_lo
387; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s17, vcc_lo
388; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
389; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s18, vcc_lo
390; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s19, vcc_lo
391; GFX10-NEXT:    s_setpc_b64 s[30:31]
392;
393; GFX11-LABEL: dyn_extract_v8i64_const_s_v:
394; GFX11:       ; %bb.0: ; %entry
395; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
397; GFX11-NEXT:    s_mov_b64 s[2:3], 2
398; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
399; GFX11-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
400; GFX11-NEXT:    s_mov_b64 s[0:1], 1
401; GFX11-NEXT:    s_mov_b64 s[4:5], 3
402; GFX11-NEXT:    s_mov_b64 s[6:7], 4
403; GFX11-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
404; GFX11-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
405; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
406; GFX11-NEXT:    s_mov_b64 s[8:9], 5
407; GFX11-NEXT:    s_mov_b64 s[10:11], 6
408; GFX11-NEXT:    s_mov_b64 s[12:13], 7
409; GFX11-NEXT:    s_mov_b64 s[14:15], 8
410; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
411; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
412; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
413; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
414; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
415; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
416; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
417; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
418; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
419; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
420; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
421; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
422; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
423; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
424; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
425; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s14, vcc_lo
426; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, s15, vcc_lo
427; GFX11-NEXT:    s_setpc_b64 s[30:31]
428entry:
429  %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
430  ret i64 %ext
431}
432
433define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
434; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s:
435; GPRIDX:       ; %bb.0: ; %entry
436; GPRIDX-NEXT:    s_mov_b64 s[4:5], 1
437; GPRIDX-NEXT:    s_mov_b32 m0, s2
438; GPRIDX-NEXT:    s_mov_b64 s[18:19], 8
439; GPRIDX-NEXT:    s_mov_b64 s[16:17], 7
440; GPRIDX-NEXT:    s_mov_b64 s[14:15], 6
441; GPRIDX-NEXT:    s_mov_b64 s[12:13], 5
442; GPRIDX-NEXT:    s_mov_b64 s[10:11], 4
443; GPRIDX-NEXT:    s_mov_b64 s[8:9], 3
444; GPRIDX-NEXT:    s_mov_b64 s[6:7], 2
445; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[4:5]
446; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
447; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
448; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
449; GPRIDX-NEXT:    s_endpgm
450;
451; MOVREL-LABEL: dyn_extract_v8i64_const_s_s:
452; MOVREL:       ; %bb.0: ; %entry
453; MOVREL-NEXT:    s_mov_b64 s[4:5], 1
454; MOVREL-NEXT:    s_mov_b32 m0, s2
455; MOVREL-NEXT:    s_mov_b64 s[18:19], 8
456; MOVREL-NEXT:    s_mov_b64 s[16:17], 7
457; MOVREL-NEXT:    s_mov_b64 s[14:15], 6
458; MOVREL-NEXT:    s_mov_b64 s[12:13], 5
459; MOVREL-NEXT:    s_mov_b64 s[10:11], 4
460; MOVREL-NEXT:    s_mov_b64 s[8:9], 3
461; MOVREL-NEXT:    s_mov_b64 s[6:7], 2
462; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[4:5]
463; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
464; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
465; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
466; MOVREL-NEXT:    s_endpgm
467;
468; GFX10-LABEL: dyn_extract_v8i64_const_s_s:
469; GFX10:       ; %bb.0: ; %entry
470; GFX10-NEXT:    s_mov_b64 s[4:5], 1
471; GFX10-NEXT:    s_mov_b32 m0, s2
472; GFX10-NEXT:    s_mov_b64 s[18:19], 8
473; GFX10-NEXT:    s_mov_b64 s[16:17], 7
474; GFX10-NEXT:    s_mov_b64 s[14:15], 6
475; GFX10-NEXT:    s_mov_b64 s[12:13], 5
476; GFX10-NEXT:    s_mov_b64 s[10:11], 4
477; GFX10-NEXT:    s_mov_b64 s[8:9], 3
478; GFX10-NEXT:    s_mov_b64 s[6:7], 2
479; GFX10-NEXT:    s_movrels_b64 s[0:1], s[4:5]
480; GFX10-NEXT:    v_mov_b32_e32 v0, s0
481; GFX10-NEXT:    v_mov_b32_e32 v1, s1
482; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
483; GFX10-NEXT:    s_endpgm
484;
485; GFX11-LABEL: dyn_extract_v8i64_const_s_s:
486; GFX11:       ; %bb.0: ; %entry
487; GFX11-NEXT:    s_mov_b64 s[4:5], 1
488; GFX11-NEXT:    s_mov_b32 m0, s2
489; GFX11-NEXT:    s_mov_b64 s[18:19], 8
490; GFX11-NEXT:    s_mov_b64 s[16:17], 7
491; GFX11-NEXT:    s_mov_b64 s[14:15], 6
492; GFX11-NEXT:    s_mov_b64 s[12:13], 5
493; GFX11-NEXT:    s_mov_b64 s[10:11], 4
494; GFX11-NEXT:    s_mov_b64 s[8:9], 3
495; GFX11-NEXT:    s_mov_b64 s[6:7], 2
496; GFX11-NEXT:    s_movrels_b64 s[0:1], s[4:5]
497; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
498; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
499; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
500; GFX11-NEXT:    s_endpgm
501entry:
502  %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
503  store i64 %ext, i64 addrspace(1)* undef
504  ret void
505}
506
507define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
508; GPRIDX-LABEL: dyn_extract_v8i64_s_v:
509; GPRIDX:       ; %bb.0: ; %entry
510; GPRIDX-NEXT:    s_mov_b32 s0, s2
511; GPRIDX-NEXT:    s_mov_b32 s1, s3
512; GPRIDX-NEXT:    s_mov_b32 s2, s4
513; GPRIDX-NEXT:    s_mov_b32 s3, s5
514; GPRIDX-NEXT:    s_mov_b32 s4, s6
515; GPRIDX-NEXT:    s_mov_b32 s5, s7
516; GPRIDX-NEXT:    v_mov_b32_e32 v1, s0
517; GPRIDX-NEXT:    v_mov_b32_e32 v2, s1
518; GPRIDX-NEXT:    v_mov_b32_e32 v3, s2
519; GPRIDX-NEXT:    v_mov_b32_e32 v4, s3
520; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
521; GPRIDX-NEXT:    s_mov_b32 s6, s8
522; GPRIDX-NEXT:    s_mov_b32 s7, s9
523; GPRIDX-NEXT:    v_mov_b32_e32 v5, s4
524; GPRIDX-NEXT:    v_mov_b32_e32 v6, s5
525; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
526; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
527; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
528; GPRIDX-NEXT:    s_mov_b32 s8, s10
529; GPRIDX-NEXT:    s_mov_b32 s9, s11
530; GPRIDX-NEXT:    v_mov_b32_e32 v7, s6
531; GPRIDX-NEXT:    v_mov_b32_e32 v8, s7
532; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
533; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
534; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
535; GPRIDX-NEXT:    s_mov_b32 s10, s12
536; GPRIDX-NEXT:    s_mov_b32 s11, s13
537; GPRIDX-NEXT:    v_mov_b32_e32 v9, s8
538; GPRIDX-NEXT:    v_mov_b32_e32 v10, s9
539; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
540; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
541; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
542; GPRIDX-NEXT:    v_mov_b32_e32 v11, s10
543; GPRIDX-NEXT:    v_mov_b32_e32 v12, s11
544; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
545; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
546; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
547; GPRIDX-NEXT:    v_mov_b32_e32 v13, s14
548; GPRIDX-NEXT:    v_mov_b32_e32 v14, s15
549; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
550; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
551; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
552; GPRIDX-NEXT:    v_mov_b32_e32 v15, s16
553; GPRIDX-NEXT:    v_mov_b32_e32 v16, s17
554; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
555; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
556; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
557; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
558; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
559; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
560; GPRIDX-NEXT:    s_endpgm
561;
562; MOVREL-LABEL: dyn_extract_v8i64_s_v:
563; MOVREL:       ; %bb.0: ; %entry
564; MOVREL-NEXT:    s_mov_b32 s0, s2
565; MOVREL-NEXT:    s_mov_b32 s1, s3
566; MOVREL-NEXT:    s_mov_b32 s2, s4
567; MOVREL-NEXT:    s_mov_b32 s3, s5
568; MOVREL-NEXT:    s_mov_b32 s4, s6
569; MOVREL-NEXT:    s_mov_b32 s5, s7
570; MOVREL-NEXT:    v_mov_b32_e32 v1, s0
571; MOVREL-NEXT:    v_mov_b32_e32 v2, s1
572; MOVREL-NEXT:    v_mov_b32_e32 v3, s2
573; MOVREL-NEXT:    v_mov_b32_e32 v4, s3
574; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
575; MOVREL-NEXT:    s_mov_b32 s6, s8
576; MOVREL-NEXT:    s_mov_b32 s7, s9
577; MOVREL-NEXT:    v_mov_b32_e32 v5, s4
578; MOVREL-NEXT:    v_mov_b32_e32 v6, s5
579; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
580; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
581; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
582; MOVREL-NEXT:    s_mov_b32 s8, s10
583; MOVREL-NEXT:    s_mov_b32 s9, s11
584; MOVREL-NEXT:    v_mov_b32_e32 v7, s6
585; MOVREL-NEXT:    v_mov_b32_e32 v8, s7
586; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
587; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
588; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
589; MOVREL-NEXT:    s_mov_b32 s10, s12
590; MOVREL-NEXT:    s_mov_b32 s11, s13
591; MOVREL-NEXT:    v_mov_b32_e32 v9, s8
592; MOVREL-NEXT:    v_mov_b32_e32 v10, s9
593; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
594; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
595; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
596; MOVREL-NEXT:    v_mov_b32_e32 v11, s10
597; MOVREL-NEXT:    v_mov_b32_e32 v12, s11
598; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
599; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
600; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
601; MOVREL-NEXT:    v_mov_b32_e32 v13, s14
602; MOVREL-NEXT:    v_mov_b32_e32 v14, s15
603; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
604; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
605; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
606; MOVREL-NEXT:    v_mov_b32_e32 v15, s16
607; MOVREL-NEXT:    v_mov_b32_e32 v16, s17
608; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
609; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
610; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
611; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
612; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
613; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
614; MOVREL-NEXT:    s_endpgm
615;
616; GFX10-LABEL: dyn_extract_v8i64_s_v:
617; GFX10:       ; %bb.0: ; %entry
618; GFX10-NEXT:    s_mov_b32 s0, s2
619; GFX10-NEXT:    s_mov_b32 s2, s4
620; GFX10-NEXT:    s_mov_b32 s19, s5
621; GFX10-NEXT:    v_mov_b32_e32 v1, s2
622; GFX10-NEXT:    v_mov_b32_e32 v2, s19
623; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
624; GFX10-NEXT:    s_mov_b32 s1, s3
625; GFX10-NEXT:    s_mov_b32 s4, s6
626; GFX10-NEXT:    s_mov_b32 s5, s7
627; GFX10-NEXT:    s_mov_b32 s6, s8
628; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
629; GFX10-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
630; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
631; GFX10-NEXT:    s_mov_b32 s7, s9
632; GFX10-NEXT:    s_mov_b32 s8, s10
633; GFX10-NEXT:    s_mov_b32 s9, s11
634; GFX10-NEXT:    s_mov_b32 s10, s12
635; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
636; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
637; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
638; GFX10-NEXT:    s_mov_b32 s11, s13
639; GFX10-NEXT:    s_mov_b32 s12, s14
640; GFX10-NEXT:    s_mov_b32 s13, s15
641; GFX10-NEXT:    s_mov_b32 s14, s16
642; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
643; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
644; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
645; GFX10-NEXT:    s_mov_b32 s15, s17
646; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
647; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
648; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
649; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
650; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
651; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
652; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
653; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
654; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
655; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s14, vcc_lo
656; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s15, vcc_lo
657; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
658; GFX10-NEXT:    s_endpgm
659;
660; GFX11-LABEL: dyn_extract_v8i64_s_v:
661; GFX11:       ; %bb.0: ; %entry
662; GFX11-NEXT:    s_mov_b32 s0, s2
663; GFX11-NEXT:    s_mov_b32 s2, s4
664; GFX11-NEXT:    s_mov_b32 s19, s5
665; GFX11-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s19
666; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
667; GFX11-NEXT:    s_mov_b32 s1, s3
668; GFX11-NEXT:    s_mov_b32 s4, s6
669; GFX11-NEXT:    s_mov_b32 s5, s7
670; GFX11-NEXT:    s_mov_b32 s6, s8
671; GFX11-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
672; GFX11-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
673; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
674; GFX11-NEXT:    s_mov_b32 s7, s9
675; GFX11-NEXT:    s_mov_b32 s8, s10
676; GFX11-NEXT:    s_mov_b32 s9, s11
677; GFX11-NEXT:    s_mov_b32 s10, s12
678; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
679; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
680; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
681; GFX11-NEXT:    s_mov_b32 s11, s13
682; GFX11-NEXT:    s_mov_b32 s12, s14
683; GFX11-NEXT:    s_mov_b32 s13, s15
684; GFX11-NEXT:    s_mov_b32 s14, s16
685; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
686; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
687; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
688; GFX11-NEXT:    s_mov_b32 s15, s17
689; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
690; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
691; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
692; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
693; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
694; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
695; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
696; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
697; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
698; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s14, vcc_lo
699; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, s15, vcc_lo
700; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
701; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
702; GFX11-NEXT:    s_endpgm
703entry:
704  %ext = extractelement <8 x i64> %vec, i32 %sel
705  store i64 %ext, i64 addrspace(1)* undef
706  ret void
707}
708
709define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
710; GCN-LABEL: dyn_extract_v8i64_v_v:
711; GCN:       ; %bb.0: ; %entry
712; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
713; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
714; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
715; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
716; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
717; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
718; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
719; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
720; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
721; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
722; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
723; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
724; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
725; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
726; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
727; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
728; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
729; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
730; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
731; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
732; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
733; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
734; GCN-NEXT:    s_setpc_b64 s[30:31]
735;
736; GFX10-LABEL: dyn_extract_v8i64_v_v:
737; GFX10:       ; %bb.0: ; %entry
738; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
739; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
740; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
741; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
742; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
743; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
744; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
745; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
746; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
747; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
748; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
749; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
750; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
751; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
752; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
753; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
754; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
755; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
756; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
757; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
758; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
759; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
760; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
761; GFX10-NEXT:    s_setpc_b64 s[30:31]
762;
763; GFX11-LABEL: dyn_extract_v8i64_v_v:
764; GFX11:       ; %bb.0: ; %entry
765; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
766; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
767; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
768; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
769; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
770; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
771; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
772; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
773; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
774; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
775; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
776; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
777; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
778; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
779; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
780; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
781; GFX11-NEXT:    s_setpc_b64 s[30:31]
782entry:
783  %ext = extractelement <8 x i64> %vec, i32 %sel
784  ret i64 %ext
785}
786
787define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
788; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
789; GPRIDX:       ; %bb.0: ; %entry
790; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
791; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
792; GPRIDX-NEXT:    v_mov_b32_e32 v16, v0
793; GPRIDX-NEXT:    v_mov_b32_e32 v17, v1
794; GPRIDX-NEXT:    s_set_gpr_idx_off
795; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[16:17], off
796; GPRIDX-NEXT:    s_endpgm
797;
798; MOVREL-LABEL: dyn_extract_v8i64_v_s:
799; MOVREL:       ; %bb.0: ; %entry
800; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
801; MOVREL-NEXT:    v_movrels_b32_e32 v16, v0
802; MOVREL-NEXT:    v_movrels_b32_e32 v17, v1
803; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[16:17]
804; MOVREL-NEXT:    s_endpgm
805;
806; GFX10-LABEL: dyn_extract_v8i64_v_s:
807; GFX10:       ; %bb.0: ; %entry
808; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
809; GFX10-NEXT:    v_movrels_b32_e32 v16, v0
810; GFX10-NEXT:    v_movrels_b32_e32 v17, v1
811; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[16:17], off
812; GFX10-NEXT:    s_endpgm
813;
814; GFX11-LABEL: dyn_extract_v8i64_v_s:
815; GFX11:       ; %bb.0: ; %entry
816; GFX11-NEXT:    s_lshl_b32 m0, s2, 1
817; GFX11-NEXT:    v_movrels_b32_e32 v16, v0
818; GFX11-NEXT:    v_movrels_b32_e32 v17, v1
819; GFX11-NEXT:    global_store_b64 v[0:1], v[16:17], off
820; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
821; GFX11-NEXT:    s_endpgm
822entry:
823  %ext = extractelement <8 x i64> %vec, i32 %sel
824  store i64 %ext, i64 addrspace(1)* undef
825  ret void
826}
827
828define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) {
829; GPRIDX-LABEL: dyn_extract_v8i64_s_s:
830; GPRIDX:       ; %bb.0: ; %entry
831; GPRIDX-NEXT:    s_mov_b32 s0, s2
832; GPRIDX-NEXT:    s_mov_b32 s1, s3
833; GPRIDX-NEXT:    s_mov_b32 m0, s18
834; GPRIDX-NEXT:    s_mov_b32 s2, s4
835; GPRIDX-NEXT:    s_mov_b32 s3, s5
836; GPRIDX-NEXT:    s_mov_b32 s4, s6
837; GPRIDX-NEXT:    s_mov_b32 s5, s7
838; GPRIDX-NEXT:    s_mov_b32 s6, s8
839; GPRIDX-NEXT:    s_mov_b32 s7, s9
840; GPRIDX-NEXT:    s_mov_b32 s8, s10
841; GPRIDX-NEXT:    s_mov_b32 s9, s11
842; GPRIDX-NEXT:    s_mov_b32 s10, s12
843; GPRIDX-NEXT:    s_mov_b32 s11, s13
844; GPRIDX-NEXT:    s_mov_b32 s12, s14
845; GPRIDX-NEXT:    s_mov_b32 s13, s15
846; GPRIDX-NEXT:    s_mov_b32 s14, s16
847; GPRIDX-NEXT:    s_mov_b32 s15, s17
848; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[0:1]
849; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
850; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
851; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
852; GPRIDX-NEXT:    s_endpgm
853;
854; MOVREL-LABEL: dyn_extract_v8i64_s_s:
855; MOVREL:       ; %bb.0: ; %entry
856; MOVREL-NEXT:    s_mov_b32 s0, s2
857; MOVREL-NEXT:    s_mov_b32 s1, s3
858; MOVREL-NEXT:    s_mov_b32 m0, s18
859; MOVREL-NEXT:    s_mov_b32 s2, s4
860; MOVREL-NEXT:    s_mov_b32 s3, s5
861; MOVREL-NEXT:    s_mov_b32 s4, s6
862; MOVREL-NEXT:    s_mov_b32 s5, s7
863; MOVREL-NEXT:    s_mov_b32 s6, s8
864; MOVREL-NEXT:    s_mov_b32 s7, s9
865; MOVREL-NEXT:    s_mov_b32 s8, s10
866; MOVREL-NEXT:    s_mov_b32 s9, s11
867; MOVREL-NEXT:    s_mov_b32 s10, s12
868; MOVREL-NEXT:    s_mov_b32 s11, s13
869; MOVREL-NEXT:    s_mov_b32 s12, s14
870; MOVREL-NEXT:    s_mov_b32 s13, s15
871; MOVREL-NEXT:    s_mov_b32 s14, s16
872; MOVREL-NEXT:    s_mov_b32 s15, s17
873; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[0:1]
874; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
875; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
876; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
877; MOVREL-NEXT:    s_endpgm
878;
879; GFX10-LABEL: dyn_extract_v8i64_s_s:
880; GFX10:       ; %bb.0: ; %entry
881; GFX10-NEXT:    s_mov_b32 s0, s2
882; GFX10-NEXT:    s_mov_b32 s1, s3
883; GFX10-NEXT:    s_mov_b32 m0, s18
884; GFX10-NEXT:    s_mov_b32 s2, s4
885; GFX10-NEXT:    s_mov_b32 s3, s5
886; GFX10-NEXT:    s_mov_b32 s4, s6
887; GFX10-NEXT:    s_mov_b32 s5, s7
888; GFX10-NEXT:    s_mov_b32 s6, s8
889; GFX10-NEXT:    s_mov_b32 s7, s9
890; GFX10-NEXT:    s_mov_b32 s8, s10
891; GFX10-NEXT:    s_mov_b32 s9, s11
892; GFX10-NEXT:    s_mov_b32 s10, s12
893; GFX10-NEXT:    s_mov_b32 s11, s13
894; GFX10-NEXT:    s_mov_b32 s12, s14
895; GFX10-NEXT:    s_mov_b32 s13, s15
896; GFX10-NEXT:    s_mov_b32 s14, s16
897; GFX10-NEXT:    s_mov_b32 s15, s17
898; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
899; GFX10-NEXT:    v_mov_b32_e32 v0, s0
900; GFX10-NEXT:    v_mov_b32_e32 v1, s1
901; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
902; GFX10-NEXT:    s_endpgm
903;
904; GFX11-LABEL: dyn_extract_v8i64_s_s:
905; GFX11:       ; %bb.0: ; %entry
906; GFX11-NEXT:    s_mov_b32 s0, s2
907; GFX11-NEXT:    s_mov_b32 s1, s3
908; GFX11-NEXT:    s_mov_b32 m0, s18
909; GFX11-NEXT:    s_mov_b32 s2, s4
910; GFX11-NEXT:    s_mov_b32 s3, s5
911; GFX11-NEXT:    s_mov_b32 s4, s6
912; GFX11-NEXT:    s_mov_b32 s5, s7
913; GFX11-NEXT:    s_mov_b32 s6, s8
914; GFX11-NEXT:    s_mov_b32 s7, s9
915; GFX11-NEXT:    s_mov_b32 s8, s10
916; GFX11-NEXT:    s_mov_b32 s9, s11
917; GFX11-NEXT:    s_mov_b32 s10, s12
918; GFX11-NEXT:    s_mov_b32 s11, s13
919; GFX11-NEXT:    s_mov_b32 s12, s14
920; GFX11-NEXT:    s_mov_b32 s13, s15
921; GFX11-NEXT:    s_mov_b32 s14, s16
922; GFX11-NEXT:    s_mov_b32 s15, s17
923; GFX11-NEXT:    s_movrels_b64 s[0:1], s[0:1]
924; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
925; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
926; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
927; GFX11-NEXT:    s_endpgm
928entry:
929  %ext = extractelement <8 x i64> %vec, i32 %sel
930  store i64 %ext, i64 addrspace(1)* undef
931  ret void
932}
933
934define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) {
935; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3:
936; GPRIDX:       ; %bb.0: ; %entry
937; GPRIDX-NEXT:    s_add_i32 s10, s10, 3
938; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
939; GPRIDX-NEXT:    s_cselect_b32 s0, s3, s2
940; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
941; GPRIDX-NEXT:    s_cselect_b32 s0, s4, s0
942; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
943; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s0
944; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
945; GPRIDX-NEXT:    s_cselect_b32 s0, s6, s0
946; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
947; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s0
948; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
949; GPRIDX-NEXT:    s_cselect_b32 s0, s8, s0
950; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 7
951; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s0
952; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
953; GPRIDX-NEXT:    ; return to shader part epilog
954;
955; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3:
956; MOVREL:       ; %bb.0: ; %entry
957; MOVREL-NEXT:    s_mov_b32 s0, s2
958; MOVREL-NEXT:    s_mov_b32 s1, s3
959; MOVREL-NEXT:    s_mov_b32 s3, s5
960; MOVREL-NEXT:    s_mov_b32 m0, s10
961; MOVREL-NEXT:    s_mov_b32 s2, s4
962; MOVREL-NEXT:    s_mov_b32 s4, s6
963; MOVREL-NEXT:    s_mov_b32 s5, s7
964; MOVREL-NEXT:    s_mov_b32 s6, s8
965; MOVREL-NEXT:    s_mov_b32 s7, s9
966; MOVREL-NEXT:    s_movrels_b32 s0, s3
967; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
968; MOVREL-NEXT:    ; return to shader part epilog
969;
970; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s_offset3:
971; GFX10PLUS:       ; %bb.0: ; %entry
972; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
973; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
974; GFX10PLUS-NEXT:    s_mov_b32 m0, s10
975; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
976; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
977; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
978; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
979; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
980; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
981; GFX10PLUS-NEXT:    s_movrels_b32 s0, s3
982; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
983; GFX10PLUS-NEXT:    ; return to shader part epilog
984entry:
985  %add = add i32 %sel, 3
986  %ext = extractelement <8 x float> %vec, i32 %add
987  ret float %ext
988}
989
990define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
991; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
992; GPRIDX:       ; %bb.0: ; %entry
993; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
994; GPRIDX-NEXT:    v_add_u32_e32 v8, 3, v8
995; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
996; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
997; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
998; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
999; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
1000; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1001; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
1002; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1003; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
1004; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
1005; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
1006; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1007; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
1008; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
1009; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
1010;
1011; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
1012; MOVREL:       ; %bb.0: ; %entry
1013; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1014; MOVREL-NEXT:    v_add_u32_e32 v8, vcc, 3, v8
1015; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
1016; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1017; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
1018; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1019; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
1020; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1021; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
1022; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1023; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
1024; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
1025; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
1026; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1027; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
1028; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
1029; MOVREL-NEXT:    s_setpc_b64 s[30:31]
1030;
1031; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v_offset3:
1032; GFX10PLUS:       ; %bb.0: ; %entry
1033; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1034; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
1035; GFX10PLUS-NEXT:    v_add_nc_u32_e32 v8, 3, v8
1036; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
1037; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1038; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
1039; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1040; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
1041; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1042; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
1043; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1044; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
1045; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1046; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
1047; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1048; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
1049; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
1050; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1051entry:
1052  %add = add i32 %sel, 3
1053  %ext = extractelement <8 x float> %vec, i32 %add
1054  ret float %ext
1055}
1056
1057define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) {
1058; GCN-LABEL: dyn_extract_v8f64_s_s_offset1:
1059; GCN:       ; %bb.0: ; %entry
1060; GCN-NEXT:    s_mov_b32 s0, s2
1061; GCN-NEXT:    s_mov_b32 s1, s3
1062; GCN-NEXT:    s_mov_b32 s2, s4
1063; GCN-NEXT:    s_mov_b32 s3, s5
1064; GCN-NEXT:    s_mov_b32 m0, s18
1065; GCN-NEXT:    s_mov_b32 s4, s6
1066; GCN-NEXT:    s_mov_b32 s5, s7
1067; GCN-NEXT:    s_mov_b32 s6, s8
1068; GCN-NEXT:    s_mov_b32 s7, s9
1069; GCN-NEXT:    s_mov_b32 s8, s10
1070; GCN-NEXT:    s_mov_b32 s9, s11
1071; GCN-NEXT:    s_mov_b32 s10, s12
1072; GCN-NEXT:    s_mov_b32 s11, s13
1073; GCN-NEXT:    s_mov_b32 s12, s14
1074; GCN-NEXT:    s_mov_b32 s13, s15
1075; GCN-NEXT:    s_mov_b32 s14, s16
1076; GCN-NEXT:    s_mov_b32 s15, s17
1077; GCN-NEXT:    s_movrels_b64 s[0:1], s[2:3]
1078; GCN-NEXT:    ; return to shader part epilog
1079;
1080; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset1:
1081; GFX10PLUS:       ; %bb.0: ; %entry
1082; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1083; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1084; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1085; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1086; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1087; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1088; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1089; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1090; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1091; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1092; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1093; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1094; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1095; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1096; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1097; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1098; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1099; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[2:3]
1100; GFX10PLUS-NEXT:    ; return to shader part epilog
1101entry:
1102  %add = add i32 %sel, 1
1103  %ext = extractelement <8 x double> %vec, i32 %add
1104  ret double %ext
1105}
1106
1107define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) {
1108; GCN-LABEL: dyn_extract_v8f64_s_s_offset2:
1109; GCN:       ; %bb.0: ; %entry
1110; GCN-NEXT:    s_mov_b32 s0, s2
1111; GCN-NEXT:    s_mov_b32 s1, s3
1112; GCN-NEXT:    s_mov_b32 s2, s4
1113; GCN-NEXT:    s_mov_b32 s3, s5
1114; GCN-NEXT:    s_mov_b32 s4, s6
1115; GCN-NEXT:    s_mov_b32 s5, s7
1116; GCN-NEXT:    s_mov_b32 m0, s18
1117; GCN-NEXT:    s_mov_b32 s6, s8
1118; GCN-NEXT:    s_mov_b32 s7, s9
1119; GCN-NEXT:    s_mov_b32 s8, s10
1120; GCN-NEXT:    s_mov_b32 s9, s11
1121; GCN-NEXT:    s_mov_b32 s10, s12
1122; GCN-NEXT:    s_mov_b32 s11, s13
1123; GCN-NEXT:    s_mov_b32 s12, s14
1124; GCN-NEXT:    s_mov_b32 s13, s15
1125; GCN-NEXT:    s_mov_b32 s14, s16
1126; GCN-NEXT:    s_mov_b32 s15, s17
1127; GCN-NEXT:    s_movrels_b64 s[0:1], s[4:5]
1128; GCN-NEXT:    ; return to shader part epilog
1129;
1130; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset2:
1131; GFX10PLUS:       ; %bb.0: ; %entry
1132; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1133; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1134; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1135; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1136; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1137; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1138; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1139; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1140; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1141; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1142; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1143; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1144; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1145; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1146; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1147; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1148; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1149; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[4:5]
1150; GFX10PLUS-NEXT:    ; return to shader part epilog
1151entry:
1152  %add = add i32 %sel, 2
1153  %ext = extractelement <8 x double> %vec, i32 %add
1154  ret double %ext
1155}
1156
1157define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) {
1158; GCN-LABEL: dyn_extract_v8f64_s_s_offset3:
1159; GCN:       ; %bb.0: ; %entry
1160; GCN-NEXT:    s_mov_b32 s0, s2
1161; GCN-NEXT:    s_mov_b32 s1, s3
1162; GCN-NEXT:    s_mov_b32 s2, s4
1163; GCN-NEXT:    s_mov_b32 s3, s5
1164; GCN-NEXT:    s_mov_b32 s4, s6
1165; GCN-NEXT:    s_mov_b32 s5, s7
1166; GCN-NEXT:    s_mov_b32 s6, s8
1167; GCN-NEXT:    s_mov_b32 s7, s9
1168; GCN-NEXT:    s_mov_b32 m0, s18
1169; GCN-NEXT:    s_mov_b32 s8, s10
1170; GCN-NEXT:    s_mov_b32 s9, s11
1171; GCN-NEXT:    s_mov_b32 s10, s12
1172; GCN-NEXT:    s_mov_b32 s11, s13
1173; GCN-NEXT:    s_mov_b32 s12, s14
1174; GCN-NEXT:    s_mov_b32 s13, s15
1175; GCN-NEXT:    s_mov_b32 s14, s16
1176; GCN-NEXT:    s_mov_b32 s15, s17
1177; GCN-NEXT:    s_movrels_b64 s[0:1], s[6:7]
1178; GCN-NEXT:    ; return to shader part epilog
1179;
1180; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset3:
1181; GFX10PLUS:       ; %bb.0: ; %entry
1182; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1183; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1184; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1185; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1186; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1187; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1188; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1189; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1190; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1191; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1192; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1193; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1194; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1195; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1196; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1197; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1198; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1199; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[6:7]
1200; GFX10PLUS-NEXT:    ; return to shader part epilog
1201entry:
1202  %add = add i32 %sel, 3
1203  %ext = extractelement <8 x double> %vec, i32 %add
1204  ret double %ext
1205}
1206
1207define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) {
1208; GCN-LABEL: dyn_extract_v8f64_s_s_offset4:
1209; GCN:       ; %bb.0: ; %entry
1210; GCN-NEXT:    s_mov_b32 s0, s2
1211; GCN-NEXT:    s_mov_b32 s1, s3
1212; GCN-NEXT:    s_mov_b32 s2, s4
1213; GCN-NEXT:    s_mov_b32 s3, s5
1214; GCN-NEXT:    s_mov_b32 s4, s6
1215; GCN-NEXT:    s_mov_b32 s5, s7
1216; GCN-NEXT:    s_mov_b32 s6, s8
1217; GCN-NEXT:    s_mov_b32 s7, s9
1218; GCN-NEXT:    s_mov_b32 s8, s10
1219; GCN-NEXT:    s_mov_b32 s9, s11
1220; GCN-NEXT:    s_mov_b32 m0, s18
1221; GCN-NEXT:    s_mov_b32 s10, s12
1222; GCN-NEXT:    s_mov_b32 s11, s13
1223; GCN-NEXT:    s_mov_b32 s12, s14
1224; GCN-NEXT:    s_mov_b32 s13, s15
1225; GCN-NEXT:    s_mov_b32 s14, s16
1226; GCN-NEXT:    s_mov_b32 s15, s17
1227; GCN-NEXT:    s_movrels_b64 s[0:1], s[8:9]
1228; GCN-NEXT:    ; return to shader part epilog
1229;
1230; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset4:
1231; GFX10PLUS:       ; %bb.0: ; %entry
1232; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1233; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1234; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1235; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1236; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1237; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1238; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1239; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1240; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1241; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1242; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1243; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1244; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1245; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1246; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1247; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1248; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1249; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[8:9]
1250; GFX10PLUS-NEXT:    ; return to shader part epilog
1251entry:
1252  %add = add i32 %sel, 4
1253  %ext = extractelement <8 x double> %vec, i32 %add
1254  ret double %ext
1255}
1256
1257define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) {
1258; GCN-LABEL: dyn_extract_v8f64_s_s_offset5:
1259; GCN:       ; %bb.0: ; %entry
1260; GCN-NEXT:    s_mov_b32 s0, s2
1261; GCN-NEXT:    s_mov_b32 s1, s3
1262; GCN-NEXT:    s_mov_b32 s2, s4
1263; GCN-NEXT:    s_mov_b32 s3, s5
1264; GCN-NEXT:    s_mov_b32 s4, s6
1265; GCN-NEXT:    s_mov_b32 s5, s7
1266; GCN-NEXT:    s_mov_b32 s6, s8
1267; GCN-NEXT:    s_mov_b32 s7, s9
1268; GCN-NEXT:    s_mov_b32 s8, s10
1269; GCN-NEXT:    s_mov_b32 s9, s11
1270; GCN-NEXT:    s_mov_b32 s10, s12
1271; GCN-NEXT:    s_mov_b32 s11, s13
1272; GCN-NEXT:    s_mov_b32 m0, s18
1273; GCN-NEXT:    s_mov_b32 s12, s14
1274; GCN-NEXT:    s_mov_b32 s13, s15
1275; GCN-NEXT:    s_mov_b32 s14, s16
1276; GCN-NEXT:    s_mov_b32 s15, s17
1277; GCN-NEXT:    s_movrels_b64 s[0:1], s[10:11]
1278; GCN-NEXT:    ; return to shader part epilog
1279;
1280; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset5:
1281; GFX10PLUS:       ; %bb.0: ; %entry
1282; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1283; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1284; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1285; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1286; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1287; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1288; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1289; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1290; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1291; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1292; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1293; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1294; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1295; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1296; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1297; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1298; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1299; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[10:11]
1300; GFX10PLUS-NEXT:    ; return to shader part epilog
1301entry:
1302  %add = add i32 %sel, 5
1303  %ext = extractelement <8 x double> %vec, i32 %add
1304  ret double %ext
1305}
1306
1307define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) {
1308; GCN-LABEL: dyn_extract_v8f64_s_s_offset6:
1309; GCN:       ; %bb.0: ; %entry
1310; GCN-NEXT:    s_mov_b32 s0, s2
1311; GCN-NEXT:    s_mov_b32 s1, s3
1312; GCN-NEXT:    s_mov_b32 s2, s4
1313; GCN-NEXT:    s_mov_b32 s3, s5
1314; GCN-NEXT:    s_mov_b32 s4, s6
1315; GCN-NEXT:    s_mov_b32 s5, s7
1316; GCN-NEXT:    s_mov_b32 s6, s8
1317; GCN-NEXT:    s_mov_b32 s7, s9
1318; GCN-NEXT:    s_mov_b32 s8, s10
1319; GCN-NEXT:    s_mov_b32 s9, s11
1320; GCN-NEXT:    s_mov_b32 s10, s12
1321; GCN-NEXT:    s_mov_b32 s11, s13
1322; GCN-NEXT:    s_mov_b32 s12, s14
1323; GCN-NEXT:    s_mov_b32 s13, s15
1324; GCN-NEXT:    s_mov_b32 m0, s18
1325; GCN-NEXT:    s_mov_b32 s14, s16
1326; GCN-NEXT:    s_mov_b32 s15, s17
1327; GCN-NEXT:    s_movrels_b64 s[0:1], s[12:13]
1328; GCN-NEXT:    ; return to shader part epilog
1329;
1330; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset6:
1331; GFX10PLUS:       ; %bb.0: ; %entry
1332; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1333; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1334; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1335; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1336; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1337; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1338; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1339; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1340; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1341; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1342; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1343; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1344; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1345; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1346; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1347; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1348; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1349; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[12:13]
1350; GFX10PLUS-NEXT:    ; return to shader part epilog
1351entry:
1352  %add = add i32 %sel, 6
1353  %ext = extractelement <8 x double> %vec, i32 %add
1354  ret double %ext
1355}
1356
1357define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) {
1358; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7:
1359; GPRIDX:       ; %bb.0: ; %entry
1360; GPRIDX-NEXT:    s_mov_b32 s0, s2
1361; GPRIDX-NEXT:    s_mov_b32 s1, s3
1362; GPRIDX-NEXT:    s_mov_b32 s2, s4
1363; GPRIDX-NEXT:    s_mov_b32 s3, s5
1364; GPRIDX-NEXT:    s_mov_b32 s4, s6
1365; GPRIDX-NEXT:    s_mov_b32 s5, s7
1366; GPRIDX-NEXT:    s_mov_b32 s6, s8
1367; GPRIDX-NEXT:    s_mov_b32 s7, s9
1368; GPRIDX-NEXT:    s_mov_b32 s8, s10
1369; GPRIDX-NEXT:    s_mov_b32 s9, s11
1370; GPRIDX-NEXT:    s_mov_b32 s10, s12
1371; GPRIDX-NEXT:    s_mov_b32 s11, s13
1372; GPRIDX-NEXT:    s_mov_b32 s12, s14
1373; GPRIDX-NEXT:    s_mov_b32 s13, s15
1374; GPRIDX-NEXT:    s_mov_b32 s14, s16
1375; GPRIDX-NEXT:    s_mov_b32 s15, s17
1376; GPRIDX-NEXT:    s_mov_b32 m0, s18
1377; GPRIDX-NEXT:    s_nop 0
1378; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1379; GPRIDX-NEXT:    ; return to shader part epilog
1380;
1381; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
1382; MOVREL:       ; %bb.0: ; %entry
1383; MOVREL-NEXT:    s_mov_b32 s0, s2
1384; MOVREL-NEXT:    s_mov_b32 s1, s3
1385; MOVREL-NEXT:    s_mov_b32 s2, s4
1386; MOVREL-NEXT:    s_mov_b32 s3, s5
1387; MOVREL-NEXT:    s_mov_b32 s4, s6
1388; MOVREL-NEXT:    s_mov_b32 s5, s7
1389; MOVREL-NEXT:    s_mov_b32 s6, s8
1390; MOVREL-NEXT:    s_mov_b32 s7, s9
1391; MOVREL-NEXT:    s_mov_b32 s8, s10
1392; MOVREL-NEXT:    s_mov_b32 s9, s11
1393; MOVREL-NEXT:    s_mov_b32 s10, s12
1394; MOVREL-NEXT:    s_mov_b32 s11, s13
1395; MOVREL-NEXT:    s_mov_b32 s12, s14
1396; MOVREL-NEXT:    s_mov_b32 s13, s15
1397; MOVREL-NEXT:    s_mov_b32 s14, s16
1398; MOVREL-NEXT:    s_mov_b32 s15, s17
1399; MOVREL-NEXT:    s_mov_b32 m0, s18
1400; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1401; MOVREL-NEXT:    ; return to shader part epilog
1402;
1403; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset7:
1404; GFX10PLUS:       ; %bb.0: ; %entry
1405; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1406; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1407; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1408; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1409; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1410; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1411; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1412; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1413; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1414; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1415; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1416; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1417; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1418; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1419; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1420; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1421; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1422; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1423; GFX10PLUS-NEXT:    ; return to shader part epilog
1424entry:
1425  %add = add i32 %sel, 7
1426  %ext = extractelement <8 x double> %vec, i32 %add
1427  ret double %ext
1428}
1429
1430define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) {
1431; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1432; GCN:       ; %bb.0: ; %entry
1433; GCN-NEXT:    s_mov_b32 s0, s2
1434; GCN-NEXT:    s_mov_b32 s1, s3
1435; GCN-NEXT:    s_add_i32 m0, s18, -1
1436; GCN-NEXT:    s_mov_b32 s2, s4
1437; GCN-NEXT:    s_mov_b32 s3, s5
1438; GCN-NEXT:    s_mov_b32 s4, s6
1439; GCN-NEXT:    s_mov_b32 s5, s7
1440; GCN-NEXT:    s_mov_b32 s6, s8
1441; GCN-NEXT:    s_mov_b32 s7, s9
1442; GCN-NEXT:    s_mov_b32 s8, s10
1443; GCN-NEXT:    s_mov_b32 s9, s11
1444; GCN-NEXT:    s_mov_b32 s10, s12
1445; GCN-NEXT:    s_mov_b32 s11, s13
1446; GCN-NEXT:    s_mov_b32 s12, s14
1447; GCN-NEXT:    s_mov_b32 s13, s15
1448; GCN-NEXT:    s_mov_b32 s14, s16
1449; GCN-NEXT:    s_mov_b32 s15, s17
1450; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1451; GCN-NEXT:    ; return to shader part epilog
1452;
1453; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1454; GFX10PLUS:       ; %bb.0: ; %entry
1455; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1456; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1457; GFX10PLUS-NEXT:    s_add_i32 m0, s18, -1
1458; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1459; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1460; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1461; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1462; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1463; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1464; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1465; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1466; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1467; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1468; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1469; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1470; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1471; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1472; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1473; GFX10PLUS-NEXT:    ; return to shader part epilog
1474entry:
1475  %add = add i32 %sel, -1
1476  %ext = extractelement <8 x double> %vec, i32 %add
1477  ret double %ext
1478}
1479
1480define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
1481; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
1482; GPRIDX:       ; %bb.0: ; %entry
1483; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1484; GPRIDX-NEXT:    v_add_u32_e32 v16, 3, v16
1485; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1486; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1487; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1488; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1489; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1490; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1491; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1492; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1493; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1494; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1495; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1496; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1497; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1498; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1499; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1500; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1501; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1502; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1503; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1504; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1505; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1506; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
1507;
1508; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
1509; MOVREL:       ; %bb.0: ; %entry
1510; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1511; MOVREL-NEXT:    v_add_u32_e32 v16, vcc, 3, v16
1512; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1513; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1514; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1515; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1516; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1517; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1518; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1519; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1520; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1521; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1522; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1523; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1524; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1525; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1526; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1527; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1528; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1529; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1530; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1531; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1532; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1533; MOVREL-NEXT:    s_setpc_b64 s[30:31]
1534;
1535; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3:
1536; GFX10:       ; %bb.0: ; %entry
1537; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1538; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1539; GFX10-NEXT:    v_add_nc_u32_e32 v16, 3, v16
1540; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1541; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1542; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1543; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1544; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1545; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1546; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1547; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1548; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1549; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1550; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1551; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1552; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1553; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1554; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1555; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1556; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1557; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1558; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1559; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1560; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1561; GFX10-NEXT:    s_setpc_b64 s[30:31]
1562;
1563; GFX11-LABEL: dyn_extract_v8f64_v_v_offset3:
1564; GFX11:       ; %bb.0: ; %entry
1565; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1567; GFX11-NEXT:    v_add_nc_u32_e32 v16, 3, v16
1568; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1569; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
1570; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1571; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
1572; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1573; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
1574; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1575; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
1576; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1577; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10
1578; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1579; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12
1580; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1581; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14
1582; GFX11-NEXT:    s_setpc_b64 s[30:31]
1583entry:
1584  %add = add i32 %sel, 3
1585  %ext = extractelement <8 x double> %vec, i32 %add
1586  ret double %ext
1587}
1588
1589define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) {
1590; GCN-LABEL: dyn_extract_v8p3_v_v:
1591; GCN:       ; %bb.0: ; %entry
1592; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1593; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
1594; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1595; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
1596; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1597; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
1598; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1599; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
1600; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1601; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
1602; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
1603; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
1604; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1605; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
1606; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
1607; GCN-NEXT:    s_setpc_b64 s[30:31]
1608;
1609; GFX10PLUS-LABEL: dyn_extract_v8p3_v_v:
1610; GFX10PLUS:       ; %bb.0: ; %entry
1611; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1612; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
1613; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
1614; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1615; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
1616; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1617; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
1618; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1619; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
1620; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1621; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
1622; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1623; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
1624; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1625; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
1626; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
1627; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1628entry:
1629  %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
1630  ret i8 addrspace(3)* %ext
1631}
1632
1633define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) {
1634; GPRIDX-LABEL: dyn_extract_v8p3_s_s:
1635; GPRIDX:       ; %bb.0: ; %entry
1636; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
1637; GPRIDX-NEXT:    s_cselect_b32 s0, s3, s2
1638; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
1639; GPRIDX-NEXT:    s_cselect_b32 s0, s4, s0
1640; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
1641; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s0
1642; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
1643; GPRIDX-NEXT:    s_cselect_b32 s0, s6, s0
1644; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
1645; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s0
1646; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
1647; GPRIDX-NEXT:    s_cselect_b32 s0, s8, s0
1648; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 7
1649; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s0
1650; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1651; GPRIDX-NEXT:    ds_write_b32 v0, v0
1652; GPRIDX-NEXT:    s_endpgm
1653;
1654; MOVREL-LABEL: dyn_extract_v8p3_s_s:
1655; MOVREL:       ; %bb.0: ; %entry
1656; MOVREL-NEXT:    s_mov_b32 s0, s2
1657; MOVREL-NEXT:    s_mov_b32 m0, s10
1658; MOVREL-NEXT:    s_mov_b32 s1, s3
1659; MOVREL-NEXT:    s_mov_b32 s2, s4
1660; MOVREL-NEXT:    s_mov_b32 s3, s5
1661; MOVREL-NEXT:    s_mov_b32 s4, s6
1662; MOVREL-NEXT:    s_mov_b32 s5, s7
1663; MOVREL-NEXT:    s_mov_b32 s6, s8
1664; MOVREL-NEXT:    s_mov_b32 s7, s9
1665; MOVREL-NEXT:    s_movrels_b32 s0, s0
1666; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
1667; MOVREL-NEXT:    s_mov_b32 m0, -1
1668; MOVREL-NEXT:    ds_write_b32 v0, v0
1669; MOVREL-NEXT:    s_endpgm
1670;
1671; GFX10-LABEL: dyn_extract_v8p3_s_s:
1672; GFX10:       ; %bb.0: ; %entry
1673; GFX10-NEXT:    s_mov_b32 s0, s2
1674; GFX10-NEXT:    s_mov_b32 m0, s10
1675; GFX10-NEXT:    s_mov_b32 s1, s3
1676; GFX10-NEXT:    s_mov_b32 s2, s4
1677; GFX10-NEXT:    s_mov_b32 s3, s5
1678; GFX10-NEXT:    s_mov_b32 s4, s6
1679; GFX10-NEXT:    s_mov_b32 s5, s7
1680; GFX10-NEXT:    s_mov_b32 s6, s8
1681; GFX10-NEXT:    s_mov_b32 s7, s9
1682; GFX10-NEXT:    s_movrels_b32 s0, s0
1683; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1684; GFX10-NEXT:    ds_write_b32 v0, v0
1685; GFX10-NEXT:    s_endpgm
1686;
1687; GFX11-LABEL: dyn_extract_v8p3_s_s:
1688; GFX11:       ; %bb.0: ; %entry
1689; GFX11-NEXT:    s_mov_b32 s0, s2
1690; GFX11-NEXT:    s_mov_b32 m0, s10
1691; GFX11-NEXT:    s_mov_b32 s1, s3
1692; GFX11-NEXT:    s_mov_b32 s2, s4
1693; GFX11-NEXT:    s_mov_b32 s3, s5
1694; GFX11-NEXT:    s_mov_b32 s4, s6
1695; GFX11-NEXT:    s_mov_b32 s5, s7
1696; GFX11-NEXT:    s_mov_b32 s6, s8
1697; GFX11-NEXT:    s_mov_b32 s7, s9
1698; GFX11-NEXT:    s_movrels_b32 s0, s0
1699; GFX11-NEXT:    v_mov_b32_e32 v0, s0
1700; GFX11-NEXT:    ds_store_b32 v0, v0
1701; GFX11-NEXT:    s_endpgm
1702entry:
1703  %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
1704  store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef
1705  ret void
1706}
1707
1708define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) {
1709; GCN-LABEL: dyn_extract_v8p1_v_v:
1710; GCN:       ; %bb.0: ; %entry
1711; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1712; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1713; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1714; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1715; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1716; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1717; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1718; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1719; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1720; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1721; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1722; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1723; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1724; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1725; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1726; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1727; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1728; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1729; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1730; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1731; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1732; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1733; GCN-NEXT:    s_setpc_b64 s[30:31]
1734;
1735; GFX10-LABEL: dyn_extract_v8p1_v_v:
1736; GFX10:       ; %bb.0: ; %entry
1737; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1738; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1739; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1740; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1741; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1742; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1743; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1744; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1745; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1746; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1747; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1748; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1749; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1750; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1751; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1752; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1753; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1754; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1755; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1756; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1757; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1758; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1759; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1760; GFX10-NEXT:    s_setpc_b64 s[30:31]
1761;
1762; GFX11-LABEL: dyn_extract_v8p1_v_v:
1763; GFX11:       ; %bb.0: ; %entry
1764; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1765; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1766; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1767; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
1768; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1769; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
1770; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1771; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
1772; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1773; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
1774; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1775; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
1776; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1777; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
1778; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1779; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
1780; GFX11-NEXT:    s_setpc_b64 s[30:31]
1781entry:
1782  %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
1783  ret i8 addrspace(1)* %ext
1784}
1785
1786define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) {
1787; GPRIDX-LABEL: dyn_extract_v8p1_s_s:
1788; GPRIDX:       ; %bb.0: ; %entry
1789; GPRIDX-NEXT:    s_mov_b32 s0, s2
1790; GPRIDX-NEXT:    s_mov_b32 s1, s3
1791; GPRIDX-NEXT:    s_mov_b32 m0, s18
1792; GPRIDX-NEXT:    s_mov_b32 s2, s4
1793; GPRIDX-NEXT:    s_mov_b32 s3, s5
1794; GPRIDX-NEXT:    s_mov_b32 s4, s6
1795; GPRIDX-NEXT:    s_mov_b32 s5, s7
1796; GPRIDX-NEXT:    s_mov_b32 s6, s8
1797; GPRIDX-NEXT:    s_mov_b32 s7, s9
1798; GPRIDX-NEXT:    s_mov_b32 s8, s10
1799; GPRIDX-NEXT:    s_mov_b32 s9, s11
1800; GPRIDX-NEXT:    s_mov_b32 s10, s12
1801; GPRIDX-NEXT:    s_mov_b32 s11, s13
1802; GPRIDX-NEXT:    s_mov_b32 s12, s14
1803; GPRIDX-NEXT:    s_mov_b32 s13, s15
1804; GPRIDX-NEXT:    s_mov_b32 s14, s16
1805; GPRIDX-NEXT:    s_mov_b32 s15, s17
1806; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1807; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1808; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
1809; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1810; GPRIDX-NEXT:    s_endpgm
1811;
1812; MOVREL-LABEL: dyn_extract_v8p1_s_s:
1813; MOVREL:       ; %bb.0: ; %entry
1814; MOVREL-NEXT:    s_mov_b32 s0, s2
1815; MOVREL-NEXT:    s_mov_b32 s1, s3
1816; MOVREL-NEXT:    s_mov_b32 m0, s18
1817; MOVREL-NEXT:    s_mov_b32 s2, s4
1818; MOVREL-NEXT:    s_mov_b32 s3, s5
1819; MOVREL-NEXT:    s_mov_b32 s4, s6
1820; MOVREL-NEXT:    s_mov_b32 s5, s7
1821; MOVREL-NEXT:    s_mov_b32 s6, s8
1822; MOVREL-NEXT:    s_mov_b32 s7, s9
1823; MOVREL-NEXT:    s_mov_b32 s8, s10
1824; MOVREL-NEXT:    s_mov_b32 s9, s11
1825; MOVREL-NEXT:    s_mov_b32 s10, s12
1826; MOVREL-NEXT:    s_mov_b32 s11, s13
1827; MOVREL-NEXT:    s_mov_b32 s12, s14
1828; MOVREL-NEXT:    s_mov_b32 s13, s15
1829; MOVREL-NEXT:    s_mov_b32 s14, s16
1830; MOVREL-NEXT:    s_mov_b32 s15, s17
1831; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1832; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
1833; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
1834; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1835; MOVREL-NEXT:    s_endpgm
1836;
1837; GFX10-LABEL: dyn_extract_v8p1_s_s:
1838; GFX10:       ; %bb.0: ; %entry
1839; GFX10-NEXT:    s_mov_b32 s0, s2
1840; GFX10-NEXT:    s_mov_b32 s1, s3
1841; GFX10-NEXT:    s_mov_b32 m0, s18
1842; GFX10-NEXT:    s_mov_b32 s2, s4
1843; GFX10-NEXT:    s_mov_b32 s3, s5
1844; GFX10-NEXT:    s_mov_b32 s4, s6
1845; GFX10-NEXT:    s_mov_b32 s5, s7
1846; GFX10-NEXT:    s_mov_b32 s6, s8
1847; GFX10-NEXT:    s_mov_b32 s7, s9
1848; GFX10-NEXT:    s_mov_b32 s8, s10
1849; GFX10-NEXT:    s_mov_b32 s9, s11
1850; GFX10-NEXT:    s_mov_b32 s10, s12
1851; GFX10-NEXT:    s_mov_b32 s11, s13
1852; GFX10-NEXT:    s_mov_b32 s12, s14
1853; GFX10-NEXT:    s_mov_b32 s13, s15
1854; GFX10-NEXT:    s_mov_b32 s14, s16
1855; GFX10-NEXT:    s_mov_b32 s15, s17
1856; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1857; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1858; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1859; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1860; GFX10-NEXT:    s_endpgm
1861;
1862; GFX11-LABEL: dyn_extract_v8p1_s_s:
1863; GFX11:       ; %bb.0: ; %entry
1864; GFX11-NEXT:    s_mov_b32 s0, s2
1865; GFX11-NEXT:    s_mov_b32 s1, s3
1866; GFX11-NEXT:    s_mov_b32 m0, s18
1867; GFX11-NEXT:    s_mov_b32 s2, s4
1868; GFX11-NEXT:    s_mov_b32 s3, s5
1869; GFX11-NEXT:    s_mov_b32 s4, s6
1870; GFX11-NEXT:    s_mov_b32 s5, s7
1871; GFX11-NEXT:    s_mov_b32 s6, s8
1872; GFX11-NEXT:    s_mov_b32 s7, s9
1873; GFX11-NEXT:    s_mov_b32 s8, s10
1874; GFX11-NEXT:    s_mov_b32 s9, s11
1875; GFX11-NEXT:    s_mov_b32 s10, s12
1876; GFX11-NEXT:    s_mov_b32 s11, s13
1877; GFX11-NEXT:    s_mov_b32 s12, s14
1878; GFX11-NEXT:    s_mov_b32 s13, s15
1879; GFX11-NEXT:    s_mov_b32 s14, s16
1880; GFX11-NEXT:    s_mov_b32 s15, s17
1881; GFX11-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1882; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1883; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
1884; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1885; GFX11-NEXT:    s_endpgm
1886entry:
1887  %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
1888  store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef
1889  ret void
1890}
1891
1892define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) {
1893; GPRIDX-LABEL: dyn_extract_v16f32_v_s:
1894; GPRIDX:       ; %bb.0: ; %entry
1895; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
1896; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
1897; GPRIDX-NEXT:    s_set_gpr_idx_off
1898; GPRIDX-NEXT:    ; return to shader part epilog
1899;
1900; MOVREL-LABEL: dyn_extract_v16f32_v_s:
1901; MOVREL:       ; %bb.0: ; %entry
1902; MOVREL-NEXT:    s_mov_b32 m0, s2
1903; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
1904; MOVREL-NEXT:    ; return to shader part epilog
1905;
1906; GFX10PLUS-LABEL: dyn_extract_v16f32_v_s:
1907; GFX10PLUS:       ; %bb.0: ; %entry
1908; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
1909; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v0
1910; GFX10PLUS-NEXT:    ; return to shader part epilog
1911entry:
1912  %ext = extractelement <16 x float> %vec, i32 %sel
1913  ret float %ext
1914}
1915
1916define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) {
1917; GPRIDX-LABEL: dyn_extract_v32f32_v_s:
1918; GPRIDX:       ; %bb.0: ; %entry
1919; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
1920; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
1921; GPRIDX-NEXT:    s_set_gpr_idx_off
1922; GPRIDX-NEXT:    ; return to shader part epilog
1923;
1924; MOVREL-LABEL: dyn_extract_v32f32_v_s:
1925; MOVREL:       ; %bb.0: ; %entry
1926; MOVREL-NEXT:    s_mov_b32 m0, s2
1927; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
1928; MOVREL-NEXT:    ; return to shader part epilog
1929;
1930; GFX10PLUS-LABEL: dyn_extract_v32f32_v_s:
1931; GFX10PLUS:       ; %bb.0: ; %entry
1932; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
1933; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v0
1934; GFX10PLUS-NEXT:    ; return to shader part epilog
1935entry:
1936  %ext = extractelement <32 x float> %vec, i32 %sel
1937  ret float %ext
1938}
1939
1940define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) {
1941; GPRIDX-LABEL: dyn_extract_v16f64_v_s:
1942; GPRIDX:       ; %bb.0: ; %entry
1943; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
1944; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
1945; GPRIDX-NEXT:    v_mov_b32_e32 v32, v0
1946; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
1947; GPRIDX-NEXT:    s_set_gpr_idx_off
1948; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v32
1949; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
1950; GPRIDX-NEXT:    ; return to shader part epilog
1951;
1952; MOVREL-LABEL: dyn_extract_v16f64_v_s:
1953; MOVREL:       ; %bb.0: ; %entry
1954; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
1955; MOVREL-NEXT:    v_movrels_b32_e32 v32, v0
1956; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
1957; MOVREL-NEXT:    v_readfirstlane_b32 s0, v32
1958; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
1959; MOVREL-NEXT:    ; return to shader part epilog
1960;
1961; GFX10PLUS-LABEL: dyn_extract_v16f64_v_s:
1962; GFX10PLUS:       ; %bb.0: ; %entry
1963; GFX10PLUS-NEXT:    s_lshl_b32 m0, s2, 1
1964; GFX10PLUS-NEXT:    v_movrels_b32_e32 v32, v0
1965; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v1
1966; GFX10PLUS-NEXT:    v_readfirstlane_b32 s0, v32
1967; GFX10PLUS-NEXT:    v_readfirstlane_b32 s1, v0
1968; GFX10PLUS-NEXT:    ; return to shader part epilog
1969entry:
1970  %ext = extractelement <16 x double> %vec, i32 %sel
1971  ret double %ext
1972}
1973
1974define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) {
1975; GCN-LABEL: dyn_extract_v16f32_s_s:
1976; GCN:       ; %bb.0: ; %entry
1977; GCN-NEXT:    s_mov_b32 s4, 1.0
1978; GCN-NEXT:    s_mov_b32 m0, s2
1979; GCN-NEXT:    s_mov_b32 s19, 0x41800000
1980; GCN-NEXT:    s_mov_b32 s18, 0x41700000
1981; GCN-NEXT:    s_mov_b32 s17, 0x41600000
1982; GCN-NEXT:    s_mov_b32 s16, 0x41500000
1983; GCN-NEXT:    s_mov_b32 s15, 0x41400000
1984; GCN-NEXT:    s_mov_b32 s14, 0x41300000
1985; GCN-NEXT:    s_mov_b32 s13, 0x41200000
1986; GCN-NEXT:    s_mov_b32 s12, 0x41100000
1987; GCN-NEXT:    s_mov_b32 s11, 0x41000000
1988; GCN-NEXT:    s_mov_b32 s10, 0x40e00000
1989; GCN-NEXT:    s_mov_b32 s9, 0x40c00000
1990; GCN-NEXT:    s_mov_b32 s8, 0x40a00000
1991; GCN-NEXT:    s_mov_b32 s7, 4.0
1992; GCN-NEXT:    s_mov_b32 s6, 0x40400000
1993; GCN-NEXT:    s_mov_b32 s5, 2.0
1994; GCN-NEXT:    s_movrels_b32 s0, s4
1995; GCN-NEXT:    v_mov_b32_e32 v0, s0
1996; GCN-NEXT:    ; return to shader part epilog
1997;
1998; GFX10PLUS-LABEL: dyn_extract_v16f32_s_s:
1999; GFX10PLUS:       ; %bb.0: ; %entry
2000; GFX10PLUS-NEXT:    s_mov_b32 s4, 1.0
2001; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
2002; GFX10PLUS-NEXT:    s_mov_b32 s19, 0x41800000
2003; GFX10PLUS-NEXT:    s_mov_b32 s18, 0x41700000
2004; GFX10PLUS-NEXT:    s_mov_b32 s17, 0x41600000
2005; GFX10PLUS-NEXT:    s_mov_b32 s16, 0x41500000
2006; GFX10PLUS-NEXT:    s_mov_b32 s15, 0x41400000
2007; GFX10PLUS-NEXT:    s_mov_b32 s14, 0x41300000
2008; GFX10PLUS-NEXT:    s_mov_b32 s13, 0x41200000
2009; GFX10PLUS-NEXT:    s_mov_b32 s12, 0x41100000
2010; GFX10PLUS-NEXT:    s_mov_b32 s11, 0x41000000
2011; GFX10PLUS-NEXT:    s_mov_b32 s10, 0x40e00000
2012; GFX10PLUS-NEXT:    s_mov_b32 s9, 0x40c00000
2013; GFX10PLUS-NEXT:    s_mov_b32 s8, 0x40a00000
2014; GFX10PLUS-NEXT:    s_mov_b32 s7, 4.0
2015; GFX10PLUS-NEXT:    s_mov_b32 s6, 0x40400000
2016; GFX10PLUS-NEXT:    s_mov_b32 s5, 2.0
2017; GFX10PLUS-NEXT:    s_movrels_b32 s0, s4
2018; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
2019; GFX10PLUS-NEXT:    ; return to shader part epilog
2020entry:
2021  %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel
2022  ret float %ext
2023}
2024
2025define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) {
2026; GCN-LABEL: dyn_extract_v32f32_s_s:
2027; GCN:       ; %bb.0: ; %entry
2028; GCN-NEXT:    s_mov_b32 s36, 1.0
2029; GCN-NEXT:    s_mov_b32 m0, s2
2030; GCN-NEXT:    s_mov_b32 s67, 0x42000000
2031; GCN-NEXT:    s_mov_b32 s66, 0x41f80000
2032; GCN-NEXT:    s_mov_b32 s65, 0x41f00000
2033; GCN-NEXT:    s_mov_b32 s64, 0x41e80000
2034; GCN-NEXT:    s_mov_b32 s63, 0x41e00000
2035; GCN-NEXT:    s_mov_b32 s62, 0x41d80000
2036; GCN-NEXT:    s_mov_b32 s61, 0x41d00000
2037; GCN-NEXT:    s_mov_b32 s60, 0x41c80000
2038; GCN-NEXT:    s_mov_b32 s59, 0x41c00000
2039; GCN-NEXT:    s_mov_b32 s58, 0x41b80000
2040; GCN-NEXT:    s_mov_b32 s57, 0x41b00000
2041; GCN-NEXT:    s_mov_b32 s56, 0x41a80000
2042; GCN-NEXT:    s_mov_b32 s55, 0x41a00000
2043; GCN-NEXT:    s_mov_b32 s54, 0x41980000
2044; GCN-NEXT:    s_mov_b32 s53, 0x41900000
2045; GCN-NEXT:    s_mov_b32 s52, 0x41880000
2046; GCN-NEXT:    s_mov_b32 s51, 0x41800000
2047; GCN-NEXT:    s_mov_b32 s50, 0x41700000
2048; GCN-NEXT:    s_mov_b32 s49, 0x41600000
2049; GCN-NEXT:    s_mov_b32 s48, 0x41500000
2050; GCN-NEXT:    s_mov_b32 s47, 0x41400000
2051; GCN-NEXT:    s_mov_b32 s46, 0x41300000
2052; GCN-NEXT:    s_mov_b32 s45, 0x41200000
2053; GCN-NEXT:    s_mov_b32 s44, 0x41100000
2054; GCN-NEXT:    s_mov_b32 s43, 0x41000000
2055; GCN-NEXT:    s_mov_b32 s42, 0x40e00000
2056; GCN-NEXT:    s_mov_b32 s41, 0x40c00000
2057; GCN-NEXT:    s_mov_b32 s40, 0x40a00000
2058; GCN-NEXT:    s_mov_b32 s39, 4.0
2059; GCN-NEXT:    s_mov_b32 s38, 0x40400000
2060; GCN-NEXT:    s_mov_b32 s37, 2.0
2061; GCN-NEXT:    s_movrels_b32 s0, s36
2062; GCN-NEXT:    v_mov_b32_e32 v0, s0
2063; GCN-NEXT:    ; return to shader part epilog
2064;
2065; GFX10PLUS-LABEL: dyn_extract_v32f32_s_s:
2066; GFX10PLUS:       ; %bb.0: ; %entry
2067; GFX10PLUS-NEXT:    s_mov_b32 s36, 1.0
2068; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
2069; GFX10PLUS-NEXT:    s_mov_b32 s67, 0x42000000
2070; GFX10PLUS-NEXT:    s_mov_b32 s66, 0x41f80000
2071; GFX10PLUS-NEXT:    s_mov_b32 s65, 0x41f00000
2072; GFX10PLUS-NEXT:    s_mov_b32 s64, 0x41e80000
2073; GFX10PLUS-NEXT:    s_mov_b32 s63, 0x41e00000
2074; GFX10PLUS-NEXT:    s_mov_b32 s62, 0x41d80000
2075; GFX10PLUS-NEXT:    s_mov_b32 s61, 0x41d00000
2076; GFX10PLUS-NEXT:    s_mov_b32 s60, 0x41c80000
2077; GFX10PLUS-NEXT:    s_mov_b32 s59, 0x41c00000
2078; GFX10PLUS-NEXT:    s_mov_b32 s58, 0x41b80000
2079; GFX10PLUS-NEXT:    s_mov_b32 s57, 0x41b00000
2080; GFX10PLUS-NEXT:    s_mov_b32 s56, 0x41a80000
2081; GFX10PLUS-NEXT:    s_mov_b32 s55, 0x41a00000
2082; GFX10PLUS-NEXT:    s_mov_b32 s54, 0x41980000
2083; GFX10PLUS-NEXT:    s_mov_b32 s53, 0x41900000
2084; GFX10PLUS-NEXT:    s_mov_b32 s52, 0x41880000
2085; GFX10PLUS-NEXT:    s_mov_b32 s51, 0x41800000
2086; GFX10PLUS-NEXT:    s_mov_b32 s50, 0x41700000
2087; GFX10PLUS-NEXT:    s_mov_b32 s49, 0x41600000
2088; GFX10PLUS-NEXT:    s_mov_b32 s48, 0x41500000
2089; GFX10PLUS-NEXT:    s_mov_b32 s47, 0x41400000
2090; GFX10PLUS-NEXT:    s_mov_b32 s46, 0x41300000
2091; GFX10PLUS-NEXT:    s_mov_b32 s45, 0x41200000
2092; GFX10PLUS-NEXT:    s_mov_b32 s44, 0x41100000
2093; GFX10PLUS-NEXT:    s_mov_b32 s43, 0x41000000
2094; GFX10PLUS-NEXT:    s_mov_b32 s42, 0x40e00000
2095; GFX10PLUS-NEXT:    s_mov_b32 s41, 0x40c00000
2096; GFX10PLUS-NEXT:    s_mov_b32 s40, 0x40a00000
2097; GFX10PLUS-NEXT:    s_mov_b32 s39, 4.0
2098; GFX10PLUS-NEXT:    s_mov_b32 s38, 0x40400000
2099; GFX10PLUS-NEXT:    s_mov_b32 s37, 2.0
2100; GFX10PLUS-NEXT:    s_movrels_b32 s0, s36
2101; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
2102; GFX10PLUS-NEXT:    ; return to shader part epilog
2103entry:
2104  %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
2105  ret float %ext
2106}
2107
2108define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) {
2109; GCN-LABEL: dyn_extract_v16f64_s_s:
2110; GCN:       ; %bb.0: ; %entry
2111; GCN-NEXT:    s_mov_b32 s66, 0
2112; GCN-NEXT:    s_mov_b64 s[36:37], 1.0
2113; GCN-NEXT:    s_mov_b32 m0, s2
2114; GCN-NEXT:    s_mov_b32 s67, 0x40300000
2115; GCN-NEXT:    s_mov_b32 s65, 0x402e0000
2116; GCN-NEXT:    s_mov_b32 s64, s66
2117; GCN-NEXT:    s_mov_b32 s63, 0x402c0000
2118; GCN-NEXT:    s_mov_b32 s62, s66
2119; GCN-NEXT:    s_mov_b32 s61, 0x402a0000
2120; GCN-NEXT:    s_mov_b32 s60, s66
2121; GCN-NEXT:    s_mov_b32 s59, 0x40280000
2122; GCN-NEXT:    s_mov_b32 s58, s66
2123; GCN-NEXT:    s_mov_b32 s57, 0x40260000
2124; GCN-NEXT:    s_mov_b32 s56, s66
2125; GCN-NEXT:    s_mov_b32 s55, 0x40240000
2126; GCN-NEXT:    s_mov_b32 s54, s66
2127; GCN-NEXT:    s_mov_b32 s53, 0x40220000
2128; GCN-NEXT:    s_mov_b32 s52, s66
2129; GCN-NEXT:    s_mov_b32 s51, 0x40200000
2130; GCN-NEXT:    s_mov_b32 s50, s66
2131; GCN-NEXT:    s_mov_b32 s49, 0x401c0000
2132; GCN-NEXT:    s_mov_b32 s48, s66
2133; GCN-NEXT:    s_mov_b32 s47, 0x40180000
2134; GCN-NEXT:    s_mov_b32 s46, s66
2135; GCN-NEXT:    s_mov_b32 s45, 0x40140000
2136; GCN-NEXT:    s_mov_b32 s44, s66
2137; GCN-NEXT:    s_mov_b64 s[42:43], 4.0
2138; GCN-NEXT:    s_mov_b32 s41, 0x40080000
2139; GCN-NEXT:    s_mov_b32 s40, s66
2140; GCN-NEXT:    s_mov_b64 s[38:39], 2.0
2141; GCN-NEXT:    s_movrels_b64 s[0:1], s[36:37]
2142; GCN-NEXT:    ; return to shader part epilog
2143;
2144; GFX10PLUS-LABEL: dyn_extract_v16f64_s_s:
2145; GFX10PLUS:       ; %bb.0: ; %entry
2146; GFX10PLUS-NEXT:    s_mov_b32 s66, 0
2147; GFX10PLUS-NEXT:    s_mov_b64 s[36:37], 1.0
2148; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
2149; GFX10PLUS-NEXT:    s_mov_b32 s67, 0x40300000
2150; GFX10PLUS-NEXT:    s_mov_b32 s65, 0x402e0000
2151; GFX10PLUS-NEXT:    s_mov_b32 s64, s66
2152; GFX10PLUS-NEXT:    s_mov_b32 s63, 0x402c0000
2153; GFX10PLUS-NEXT:    s_mov_b32 s62, s66
2154; GFX10PLUS-NEXT:    s_mov_b32 s61, 0x402a0000
2155; GFX10PLUS-NEXT:    s_mov_b32 s60, s66
2156; GFX10PLUS-NEXT:    s_mov_b32 s59, 0x40280000
2157; GFX10PLUS-NEXT:    s_mov_b32 s58, s66
2158; GFX10PLUS-NEXT:    s_mov_b32 s57, 0x40260000
2159; GFX10PLUS-NEXT:    s_mov_b32 s56, s66
2160; GFX10PLUS-NEXT:    s_mov_b32 s55, 0x40240000
2161; GFX10PLUS-NEXT:    s_mov_b32 s54, s66
2162; GFX10PLUS-NEXT:    s_mov_b32 s53, 0x40220000
2163; GFX10PLUS-NEXT:    s_mov_b32 s52, s66
2164; GFX10PLUS-NEXT:    s_mov_b32 s51, 0x40200000
2165; GFX10PLUS-NEXT:    s_mov_b32 s50, s66
2166; GFX10PLUS-NEXT:    s_mov_b32 s49, 0x401c0000
2167; GFX10PLUS-NEXT:    s_mov_b32 s48, s66
2168; GFX10PLUS-NEXT:    s_mov_b32 s47, 0x40180000
2169; GFX10PLUS-NEXT:    s_mov_b32 s46, s66
2170; GFX10PLUS-NEXT:    s_mov_b32 s45, 0x40140000
2171; GFX10PLUS-NEXT:    s_mov_b32 s44, s66
2172; GFX10PLUS-NEXT:    s_mov_b64 s[42:43], 4.0
2173; GFX10PLUS-NEXT:    s_mov_b32 s41, 0x40080000
2174; GFX10PLUS-NEXT:    s_mov_b32 s40, s66
2175; GFX10PLUS-NEXT:    s_mov_b64 s[38:39], 2.0
2176; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[36:37]
2177; GFX10PLUS-NEXT:    ; return to shader part epilog
2178entry:
2179  %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
2180  ret double %ext
2181}
2182
2183define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) {
2184; GCN-LABEL: dyn_extract_v6f32_s_v:
2185; GCN:       ; %bb.0: ; %entry
2186; GCN-NEXT:    s_mov_b32 s0, s2
2187; GCN-NEXT:    s_mov_b32 s1, s3
2188; GCN-NEXT:    v_mov_b32_e32 v1, s0
2189; GCN-NEXT:    v_mov_b32_e32 v2, s1
2190; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2191; GCN-NEXT:    v_mov_b32_e32 v3, s4
2192; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2193; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2194; GCN-NEXT:    v_mov_b32_e32 v4, s5
2195; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2196; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2197; GCN-NEXT:    v_mov_b32_e32 v5, s6
2198; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2199; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2200; GCN-NEXT:    v_mov_b32_e32 v6, s7
2201; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2202; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2203; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v6, vcc
2204; GCN-NEXT:    ; return to shader part epilog
2205;
2206; GFX10PLUS-LABEL: dyn_extract_v6f32_s_v:
2207; GFX10PLUS:       ; %bb.0: ; %entry
2208; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2209; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2210; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, s1
2211; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2212; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2213; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2214; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2215; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2216; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2217; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2218; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
2219; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2220; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
2221; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2222; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2223; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2224; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, s5, vcc_lo
2225; GFX10PLUS-NEXT:    ; return to shader part epilog
2226entry:
2227  %ext = extractelement <6 x float> %vec, i32 %sel
2228  ret float %ext
2229}
2230
2231define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) {
2232; GCN-LABEL: dyn_extract_v6f32_v_v:
2233; GCN:       ; %bb.0: ; %entry
2234; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2235; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v6
2236; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2237; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v6
2238; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2239; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v6
2240; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2241; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v6
2242; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2243; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v6
2244; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2245; GCN-NEXT:    s_setpc_b64 s[30:31]
2246;
2247; GFX10PLUS-LABEL: dyn_extract_v6f32_v_v:
2248; GFX10PLUS:       ; %bb.0: ; %entry
2249; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2250; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
2251; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
2252; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2253; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v6
2254; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2255; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v6
2256; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2257; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v6
2258; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2259; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v6
2260; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2261; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
2262entry:
2263  %ext = extractelement <6 x float> %vec, i32 %sel
2264  ret float %ext
2265}
2266
2267define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) {
2268; GCN-LABEL: dyn_extract_v6f32_v_s:
2269; GCN:       ; %bb.0: ; %entry
2270; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
2271; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2272; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
2273; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2274; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
2275; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2276; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
2277; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2278; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
2279; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2280; GCN-NEXT:    ; return to shader part epilog
2281;
2282; GFX10PLUS-LABEL: dyn_extract_v6f32_v_s:
2283; GFX10PLUS:       ; %bb.0: ; %entry
2284; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
2285; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2286; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
2287; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2288; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
2289; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2290; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
2291; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2292; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
2293; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2294; GFX10PLUS-NEXT:    ; return to shader part epilog
2295entry:
2296  %ext = extractelement <6 x float> %vec, i32 %sel
2297  ret float %ext
2298}
2299
2300define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) {
2301; GCN-LABEL: dyn_extract_v6f32_s_s:
2302; GCN:       ; %bb.0: ; %entry
2303; GCN-NEXT:    s_cmp_eq_u32 s8, 1
2304; GCN-NEXT:    s_cselect_b32 s0, s3, s2
2305; GCN-NEXT:    s_cmp_eq_u32 s8, 2
2306; GCN-NEXT:    s_cselect_b32 s0, s4, s0
2307; GCN-NEXT:    s_cmp_eq_u32 s8, 3
2308; GCN-NEXT:    s_cselect_b32 s0, s5, s0
2309; GCN-NEXT:    s_cmp_eq_u32 s8, 4
2310; GCN-NEXT:    s_cselect_b32 s0, s6, s0
2311; GCN-NEXT:    s_cmp_eq_u32 s8, 5
2312; GCN-NEXT:    s_cselect_b32 s0, s7, s0
2313; GCN-NEXT:    v_mov_b32_e32 v0, s0
2314; GCN-NEXT:    ; return to shader part epilog
2315;
2316; GFX10PLUS-LABEL: dyn_extract_v6f32_s_s:
2317; GFX10PLUS:       ; %bb.0: ; %entry
2318; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 1
2319; GFX10PLUS-NEXT:    s_cselect_b32 s0, s3, s2
2320; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 2
2321; GFX10PLUS-NEXT:    s_cselect_b32 s0, s4, s0
2322; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 3
2323; GFX10PLUS-NEXT:    s_cselect_b32 s0, s5, s0
2324; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 4
2325; GFX10PLUS-NEXT:    s_cselect_b32 s0, s6, s0
2326; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 5
2327; GFX10PLUS-NEXT:    s_cselect_b32 s0, s7, s0
2328; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
2329; GFX10PLUS-NEXT:    ; return to shader part epilog
2330entry:
2331  %ext = extractelement <6 x float> %vec, i32 %sel
2332  ret float %ext
2333}
2334
2335define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) {
2336; GCN-LABEL: dyn_extract_v7f32_s_v:
2337; GCN:       ; %bb.0: ; %entry
2338; GCN-NEXT:    s_mov_b32 s0, s2
2339; GCN-NEXT:    s_mov_b32 s1, s3
2340; GCN-NEXT:    s_mov_b32 s2, s4
2341; GCN-NEXT:    v_mov_b32_e32 v1, s0
2342; GCN-NEXT:    v_mov_b32_e32 v2, s1
2343; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2344; GCN-NEXT:    v_mov_b32_e32 v3, s2
2345; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2346; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2347; GCN-NEXT:    v_mov_b32_e32 v4, s5
2348; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2349; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2350; GCN-NEXT:    v_mov_b32_e32 v5, s6
2351; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2352; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2353; GCN-NEXT:    v_mov_b32_e32 v6, s7
2354; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2355; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2356; GCN-NEXT:    v_mov_b32_e32 v7, s8
2357; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
2358; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2359; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v7, vcc
2360; GCN-NEXT:    ; return to shader part epilog
2361;
2362; GFX10PLUS-LABEL: dyn_extract_v7f32_s_v:
2363; GFX10PLUS:       ; %bb.0: ; %entry
2364; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2365; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2366; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, s1
2367; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2368; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2369; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2370; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2371; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2372; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2373; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2374; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2375; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
2376; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2377; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
2378; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2379; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2380; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2381; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
2382; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2383; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, s6, vcc_lo
2384; GFX10PLUS-NEXT:    ; return to shader part epilog
2385entry:
2386  %ext = extractelement <7 x float> %vec, i32 %sel
2387  ret float %ext
2388}
2389
2390define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) {
2391; GCN-LABEL: dyn_extract_v7f32_v_v:
2392; GCN:       ; %bb.0: ; %entry
2393; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2394; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v7
2395; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2396; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v7
2397; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2398; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
2399; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2400; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v7
2401; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2402; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v7
2403; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2404; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v7
2405; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2406; GCN-NEXT:    s_setpc_b64 s[30:31]
2407;
2408; GFX10PLUS-LABEL: dyn_extract_v7f32_v_v:
2409; GFX10PLUS:       ; %bb.0: ; %entry
2410; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2411; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
2412; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
2413; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2414; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v7
2415; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2416; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v7
2417; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2418; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v7
2419; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2420; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v7
2421; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2422; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v7
2423; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2424; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
2425entry:
2426  %ext = extractelement <7 x float> %vec, i32 %sel
2427  ret float %ext
2428}
2429
2430define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) {
2431; GCN-LABEL: dyn_extract_v7f32_v_s:
2432; GCN:       ; %bb.0: ; %entry
2433; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
2434; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2435; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
2436; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2437; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
2438; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2439; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
2440; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2441; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
2442; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2443; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
2444; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2445; GCN-NEXT:    ; return to shader part epilog
2446;
2447; GFX10PLUS-LABEL: dyn_extract_v7f32_v_s:
2448; GFX10PLUS:       ; %bb.0: ; %entry
2449; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
2450; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2451; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
2452; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2453; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
2454; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2455; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
2456; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2457; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
2458; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2459; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 6
2460; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2461; GFX10PLUS-NEXT:    ; return to shader part epilog
2462entry:
2463  %ext = extractelement <7 x float> %vec, i32 %sel
2464  ret float %ext
2465}
2466
2467define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) {
2468; GCN-LABEL: dyn_extract_v7f32_s_s:
2469; GCN:       ; %bb.0: ; %entry
2470; GCN-NEXT:    s_cmp_eq_u32 s9, 1
2471; GCN-NEXT:    s_cselect_b32 s0, s3, s2
2472; GCN-NEXT:    s_cmp_eq_u32 s9, 2
2473; GCN-NEXT:    s_cselect_b32 s0, s4, s0
2474; GCN-NEXT:    s_cmp_eq_u32 s9, 3
2475; GCN-NEXT:    s_cselect_b32 s0, s5, s0
2476; GCN-NEXT:    s_cmp_eq_u32 s9, 4
2477; GCN-NEXT:    s_cselect_b32 s0, s6, s0
2478; GCN-NEXT:    s_cmp_eq_u32 s9, 5
2479; GCN-NEXT:    s_cselect_b32 s0, s7, s0
2480; GCN-NEXT:    s_cmp_eq_u32 s9, 6
2481; GCN-NEXT:    s_cselect_b32 s0, s8, s0
2482; GCN-NEXT:    v_mov_b32_e32 v0, s0
2483; GCN-NEXT:    ; return to shader part epilog
2484;
2485; GFX10PLUS-LABEL: dyn_extract_v7f32_s_s:
2486; GFX10PLUS:       ; %bb.0: ; %entry
2487; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 1
2488; GFX10PLUS-NEXT:    s_cselect_b32 s0, s3, s2
2489; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 2
2490; GFX10PLUS-NEXT:    s_cselect_b32 s0, s4, s0
2491; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 3
2492; GFX10PLUS-NEXT:    s_cselect_b32 s0, s5, s0
2493; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 4
2494; GFX10PLUS-NEXT:    s_cselect_b32 s0, s6, s0
2495; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 5
2496; GFX10PLUS-NEXT:    s_cselect_b32 s0, s7, s0
2497; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 6
2498; GFX10PLUS-NEXT:    s_cselect_b32 s0, s8, s0
2499; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
2500; GFX10PLUS-NEXT:    ; return to shader part epilog
2501entry:
2502  %ext = extractelement <7 x float> %vec, i32 %sel
2503  ret float %ext
2504}
2505
2506define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) {
2507; GCN-LABEL: dyn_extract_v6f64_s_v:
2508; GCN:       ; %bb.0: ; %entry
2509; GCN-NEXT:    s_mov_b32 s0, s2
2510; GCN-NEXT:    s_mov_b32 s1, s3
2511; GCN-NEXT:    s_mov_b32 s2, s4
2512; GCN-NEXT:    s_mov_b32 s3, s5
2513; GCN-NEXT:    s_mov_b32 s4, s6
2514; GCN-NEXT:    s_mov_b32 s5, s7
2515; GCN-NEXT:    v_mov_b32_e32 v1, s0
2516; GCN-NEXT:    v_mov_b32_e32 v2, s1
2517; GCN-NEXT:    v_mov_b32_e32 v3, s2
2518; GCN-NEXT:    v_mov_b32_e32 v4, s3
2519; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2520; GCN-NEXT:    s_mov_b32 s6, s8
2521; GCN-NEXT:    s_mov_b32 s7, s9
2522; GCN-NEXT:    v_mov_b32_e32 v5, s4
2523; GCN-NEXT:    v_mov_b32_e32 v6, s5
2524; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2525; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2526; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2527; GCN-NEXT:    v_mov_b32_e32 v7, s6
2528; GCN-NEXT:    v_mov_b32_e32 v8, s7
2529; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2530; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2531; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2532; GCN-NEXT:    v_mov_b32_e32 v9, s10
2533; GCN-NEXT:    v_mov_b32_e32 v10, s11
2534; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2535; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2536; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2537; GCN-NEXT:    v_mov_b32_e32 v11, s12
2538; GCN-NEXT:    v_mov_b32_e32 v12, s13
2539; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2540; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
2541; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2542; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v11, vcc
2543; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v12, vcc
2544; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2545; GCN-NEXT:    v_readfirstlane_b32 s1, v1
2546; GCN-NEXT:    ; return to shader part epilog
2547;
2548; GFX10-LABEL: dyn_extract_v6f64_s_v:
2549; GFX10:       ; %bb.0: ; %entry
2550; GFX10-NEXT:    s_mov_b32 s0, s2
2551; GFX10-NEXT:    s_mov_b32 s2, s4
2552; GFX10-NEXT:    s_mov_b32 s15, s5
2553; GFX10-NEXT:    v_mov_b32_e32 v1, s2
2554; GFX10-NEXT:    v_mov_b32_e32 v2, s15
2555; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2556; GFX10-NEXT:    s_mov_b32 s1, s3
2557; GFX10-NEXT:    s_mov_b32 s4, s6
2558; GFX10-NEXT:    s_mov_b32 s5, s7
2559; GFX10-NEXT:    s_mov_b32 s6, s8
2560; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2561; GFX10-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2562; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2563; GFX10-NEXT:    s_mov_b32 s7, s9
2564; GFX10-NEXT:    s_mov_b32 s8, s10
2565; GFX10-NEXT:    s_mov_b32 s9, s11
2566; GFX10-NEXT:    s_mov_b32 s10, s12
2567; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2568; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2569; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2570; GFX10-NEXT:    s_mov_b32 s11, s13
2571; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2572; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2573; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2574; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2575; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2576; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2577; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s10, vcc_lo
2578; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s11, vcc_lo
2579; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2580; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
2581; GFX10-NEXT:    ; return to shader part epilog
2582;
2583; GFX11-LABEL: dyn_extract_v6f64_s_v:
2584; GFX11:       ; %bb.0: ; %entry
2585; GFX11-NEXT:    s_mov_b32 s0, s2
2586; GFX11-NEXT:    s_mov_b32 s2, s4
2587; GFX11-NEXT:    s_mov_b32 s15, s5
2588; GFX11-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s15
2589; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2590; GFX11-NEXT:    s_mov_b32 s1, s3
2591; GFX11-NEXT:    s_mov_b32 s4, s6
2592; GFX11-NEXT:    s_mov_b32 s5, s7
2593; GFX11-NEXT:    s_mov_b32 s6, s8
2594; GFX11-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2595; GFX11-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2596; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2597; GFX11-NEXT:    s_mov_b32 s7, s9
2598; GFX11-NEXT:    s_mov_b32 s8, s10
2599; GFX11-NEXT:    s_mov_b32 s9, s11
2600; GFX11-NEXT:    s_mov_b32 s10, s12
2601; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2602; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2603; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2604; GFX11-NEXT:    s_mov_b32 s11, s13
2605; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2606; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2607; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2608; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2609; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2610; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2611; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s10, vcc_lo
2612; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, s11, vcc_lo
2613; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2614; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
2615; GFX11-NEXT:    ; return to shader part epilog
2616entry:
2617  %ext = extractelement <6 x double> %vec, i32 %sel
2618  ret double %ext
2619}
2620
2621define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) {
2622; GCN-LABEL: dyn_extract_v6f64_v_v:
2623; GCN:       ; %bb.0: ; %entry
2624; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2625; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v12
2626; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2627; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2628; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v12
2629; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2630; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2631; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v12
2632; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2633; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2634; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v12
2635; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
2636; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2637; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v12
2638; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
2639; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2640; GCN-NEXT:    s_setpc_b64 s[30:31]
2641;
2642; GFX10-LABEL: dyn_extract_v6f64_v_v:
2643; GFX10:       ; %bb.0: ; %entry
2644; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2645; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2646; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
2647; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2648; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2649; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v12
2650; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2651; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2652; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v12
2653; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2654; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2655; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v12
2656; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2657; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2658; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v12
2659; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2660; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2661; GFX10-NEXT:    s_setpc_b64 s[30:31]
2662;
2663; GFX11-LABEL: dyn_extract_v6f64_v_v:
2664; GFX11:       ; %bb.0: ; %entry
2665; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2666; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2667; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
2668; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
2669; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v12
2670; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
2671; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v12
2672; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
2673; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v12
2674; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
2675; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v12
2676; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
2677; GFX11-NEXT:    s_setpc_b64 s[30:31]
2678entry:
2679  %ext = extractelement <6 x double> %vec, i32 %sel
2680  ret double %ext
2681}
2682
2683define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) {
2684; GPRIDX-LABEL: dyn_extract_v6f64_v_s:
2685; GPRIDX:       ; %bb.0: ; %entry
2686; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
2687; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
2688; GPRIDX-NEXT:    v_mov_b32_e32 v12, v0
2689; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
2690; GPRIDX-NEXT:    s_set_gpr_idx_off
2691; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v12
2692; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
2693; GPRIDX-NEXT:    ; return to shader part epilog
2694;
2695; MOVREL-LABEL: dyn_extract_v6f64_v_s:
2696; MOVREL:       ; %bb.0: ; %entry
2697; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
2698; MOVREL-NEXT:    v_movrels_b32_e32 v12, v0
2699; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
2700; MOVREL-NEXT:    v_readfirstlane_b32 s0, v12
2701; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
2702; MOVREL-NEXT:    ; return to shader part epilog
2703;
2704; GFX10PLUS-LABEL: dyn_extract_v6f64_v_s:
2705; GFX10PLUS:       ; %bb.0: ; %entry
2706; GFX10PLUS-NEXT:    s_lshl_b32 m0, s2, 1
2707; GFX10PLUS-NEXT:    v_movrels_b32_e32 v12, v0
2708; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v1
2709; GFX10PLUS-NEXT:    v_readfirstlane_b32 s0, v12
2710; GFX10PLUS-NEXT:    v_readfirstlane_b32 s1, v0
2711; GFX10PLUS-NEXT:    ; return to shader part epilog
2712entry:
2713  %ext = extractelement <6 x double> %vec, i32 %sel
2714  ret double %ext
2715}
2716
2717define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) {
2718; GCN-LABEL: dyn_extract_v6f64_s_s:
2719; GCN:       ; %bb.0: ; %entry
2720; GCN-NEXT:    s_mov_b32 s0, s2
2721; GCN-NEXT:    s_mov_b32 s1, s3
2722; GCN-NEXT:    s_mov_b32 m0, s14
2723; GCN-NEXT:    s_mov_b32 s2, s4
2724; GCN-NEXT:    s_mov_b32 s3, s5
2725; GCN-NEXT:    s_mov_b32 s4, s6
2726; GCN-NEXT:    s_mov_b32 s5, s7
2727; GCN-NEXT:    s_mov_b32 s6, s8
2728; GCN-NEXT:    s_mov_b32 s7, s9
2729; GCN-NEXT:    s_mov_b32 s8, s10
2730; GCN-NEXT:    s_mov_b32 s9, s11
2731; GCN-NEXT:    s_mov_b32 s10, s12
2732; GCN-NEXT:    s_mov_b32 s11, s13
2733; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2734; GCN-NEXT:    ; return to shader part epilog
2735;
2736; GFX10PLUS-LABEL: dyn_extract_v6f64_s_s:
2737; GFX10PLUS:       ; %bb.0: ; %entry
2738; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2739; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2740; GFX10PLUS-NEXT:    s_mov_b32 m0, s14
2741; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2742; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2743; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2744; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2745; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2746; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2747; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
2748; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
2749; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
2750; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
2751; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2752; GFX10PLUS-NEXT:    ; return to shader part epilog
2753entry:
2754  %ext = extractelement <6 x double> %vec, i32 %sel
2755  ret double %ext
2756}
2757
2758define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) {
2759; GCN-LABEL: dyn_extract_v7f64_s_v:
2760; GCN:       ; %bb.0: ; %entry
2761; GCN-NEXT:    s_mov_b32 s0, s2
2762; GCN-NEXT:    s_mov_b32 s1, s3
2763; GCN-NEXT:    s_mov_b32 s2, s4
2764; GCN-NEXT:    s_mov_b32 s3, s5
2765; GCN-NEXT:    s_mov_b32 s4, s6
2766; GCN-NEXT:    s_mov_b32 s5, s7
2767; GCN-NEXT:    v_mov_b32_e32 v1, s0
2768; GCN-NEXT:    v_mov_b32_e32 v2, s1
2769; GCN-NEXT:    v_mov_b32_e32 v3, s2
2770; GCN-NEXT:    v_mov_b32_e32 v4, s3
2771; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2772; GCN-NEXT:    s_mov_b32 s6, s8
2773; GCN-NEXT:    s_mov_b32 s7, s9
2774; GCN-NEXT:    v_mov_b32_e32 v5, s4
2775; GCN-NEXT:    v_mov_b32_e32 v6, s5
2776; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2777; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2778; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2779; GCN-NEXT:    s_mov_b32 s8, s10
2780; GCN-NEXT:    s_mov_b32 s9, s11
2781; GCN-NEXT:    v_mov_b32_e32 v7, s6
2782; GCN-NEXT:    v_mov_b32_e32 v8, s7
2783; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2784; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2785; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2786; GCN-NEXT:    v_mov_b32_e32 v9, s8
2787; GCN-NEXT:    v_mov_b32_e32 v10, s9
2788; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2789; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2790; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2791; GCN-NEXT:    v_mov_b32_e32 v11, s12
2792; GCN-NEXT:    v_mov_b32_e32 v12, s13
2793; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2794; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
2795; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2796; GCN-NEXT:    v_mov_b32_e32 v13, s14
2797; GCN-NEXT:    v_mov_b32_e32 v14, s15
2798; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2799; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
2800; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2801; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v13, vcc
2802; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v14, vcc
2803; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2804; GCN-NEXT:    v_readfirstlane_b32 s1, v1
2805; GCN-NEXT:    ; return to shader part epilog
2806;
2807; GFX10-LABEL: dyn_extract_v7f64_s_v:
2808; GFX10:       ; %bb.0: ; %entry
2809; GFX10-NEXT:    s_mov_b32 s0, s2
2810; GFX10-NEXT:    s_mov_b32 s2, s4
2811; GFX10-NEXT:    s_mov_b32 s19, s5
2812; GFX10-NEXT:    v_mov_b32_e32 v1, s2
2813; GFX10-NEXT:    v_mov_b32_e32 v2, s19
2814; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2815; GFX10-NEXT:    s_mov_b32 s1, s3
2816; GFX10-NEXT:    s_mov_b32 s4, s6
2817; GFX10-NEXT:    s_mov_b32 s5, s7
2818; GFX10-NEXT:    s_mov_b32 s6, s8
2819; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2820; GFX10-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2821; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2822; GFX10-NEXT:    s_mov_b32 s7, s9
2823; GFX10-NEXT:    s_mov_b32 s8, s10
2824; GFX10-NEXT:    s_mov_b32 s9, s11
2825; GFX10-NEXT:    s_mov_b32 s10, s12
2826; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2827; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2828; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2829; GFX10-NEXT:    s_mov_b32 s11, s13
2830; GFX10-NEXT:    s_mov_b32 s12, s14
2831; GFX10-NEXT:    s_mov_b32 s13, s15
2832; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2833; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2834; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2835; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2836; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2837; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2838; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2839; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2840; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2841; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s12, vcc_lo
2842; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s13, vcc_lo
2843; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2844; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
2845; GFX10-NEXT:    ; return to shader part epilog
2846;
2847; GFX11-LABEL: dyn_extract_v7f64_s_v:
2848; GFX11:       ; %bb.0: ; %entry
2849; GFX11-NEXT:    s_mov_b32 s0, s2
2850; GFX11-NEXT:    s_mov_b32 s2, s4
2851; GFX11-NEXT:    s_mov_b32 s19, s5
2852; GFX11-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s19
2853; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2854; GFX11-NEXT:    s_mov_b32 s1, s3
2855; GFX11-NEXT:    s_mov_b32 s4, s6
2856; GFX11-NEXT:    s_mov_b32 s5, s7
2857; GFX11-NEXT:    s_mov_b32 s6, s8
2858; GFX11-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
2859; GFX11-NEXT:    v_cndmask_b32_e32 v2, s1, v2, vcc_lo
2860; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2861; GFX11-NEXT:    s_mov_b32 s7, s9
2862; GFX11-NEXT:    s_mov_b32 s8, s10
2863; GFX11-NEXT:    s_mov_b32 s9, s11
2864; GFX11-NEXT:    s_mov_b32 s10, s12
2865; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2866; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
2867; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2868; GFX11-NEXT:    s_mov_b32 s11, s13
2869; GFX11-NEXT:    s_mov_b32 s12, s14
2870; GFX11-NEXT:    s_mov_b32 s13, s15
2871; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2872; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2873; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2874; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2875; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2876; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2877; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2878; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2879; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2880; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s12, vcc_lo
2881; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, s13, vcc_lo
2882; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2883; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
2884; GFX11-NEXT:    ; return to shader part epilog
2885entry:
2886  %ext = extractelement <7 x double> %vec, i32 %sel
2887  ret double %ext
2888}
2889
2890define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) {
2891; GCN-LABEL: dyn_extract_v7f64_v_v:
2892; GCN:       ; %bb.0: ; %entry
2893; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2894; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v14
2895; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2896; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2897; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v14
2898; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2899; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2900; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v14
2901; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2902; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2903; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v14
2904; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
2905; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2906; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v14
2907; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
2908; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2909; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v14
2910; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
2911; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
2912; GCN-NEXT:    s_setpc_b64 s[30:31]
2913;
2914; GFX10-LABEL: dyn_extract_v7f64_v_v:
2915; GFX10:       ; %bb.0: ; %entry
2916; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2917; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2918; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
2919; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2920; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2921; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v14
2922; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2923; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2924; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v14
2925; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2926; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2927; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v14
2928; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2929; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2930; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v14
2931; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2932; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2933; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v14
2934; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
2935; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
2936; GFX10-NEXT:    s_setpc_b64 s[30:31]
2937;
2938; GFX11-LABEL: dyn_extract_v7f64_v_v:
2939; GFX11:       ; %bb.0: ; %entry
2940; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2941; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2942; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
2943; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
2944; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v14
2945; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
2946; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v14
2947; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
2948; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v14
2949; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
2950; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v14
2951; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
2952; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v14
2953; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
2954; GFX11-NEXT:    s_setpc_b64 s[30:31]
2955entry:
2956  %ext = extractelement <7 x double> %vec, i32 %sel
2957  ret double %ext
2958}
2959
2960define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) {
2961; GPRIDX-LABEL: dyn_extract_v7f64_v_s:
2962; GPRIDX:       ; %bb.0: ; %entry
2963; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
2964; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
2965; GPRIDX-NEXT:    v_mov_b32_e32 v14, v0
2966; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
2967; GPRIDX-NEXT:    s_set_gpr_idx_off
2968; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v14
2969; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
2970; GPRIDX-NEXT:    ; return to shader part epilog
2971;
2972; MOVREL-LABEL: dyn_extract_v7f64_v_s:
2973; MOVREL:       ; %bb.0: ; %entry
2974; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
2975; MOVREL-NEXT:    v_movrels_b32_e32 v14, v0
2976; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
2977; MOVREL-NEXT:    v_readfirstlane_b32 s0, v14
2978; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
2979; MOVREL-NEXT:    ; return to shader part epilog
2980;
2981; GFX10PLUS-LABEL: dyn_extract_v7f64_v_s:
2982; GFX10PLUS:       ; %bb.0: ; %entry
2983; GFX10PLUS-NEXT:    s_lshl_b32 m0, s2, 1
2984; GFX10PLUS-NEXT:    v_movrels_b32_e32 v14, v0
2985; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v1
2986; GFX10PLUS-NEXT:    v_readfirstlane_b32 s0, v14
2987; GFX10PLUS-NEXT:    v_readfirstlane_b32 s1, v0
2988; GFX10PLUS-NEXT:    ; return to shader part epilog
2989entry:
2990  %ext = extractelement <7 x double> %vec, i32 %sel
2991  ret double %ext
2992}
2993
2994define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) {
2995; GCN-LABEL: dyn_extract_v7f64_s_s:
2996; GCN:       ; %bb.0: ; %entry
2997; GCN-NEXT:    s_mov_b32 s0, s2
2998; GCN-NEXT:    s_mov_b32 s1, s3
2999; GCN-NEXT:    s_mov_b32 m0, s16
3000; GCN-NEXT:    s_mov_b32 s2, s4
3001; GCN-NEXT:    s_mov_b32 s3, s5
3002; GCN-NEXT:    s_mov_b32 s4, s6
3003; GCN-NEXT:    s_mov_b32 s5, s7
3004; GCN-NEXT:    s_mov_b32 s6, s8
3005; GCN-NEXT:    s_mov_b32 s7, s9
3006; GCN-NEXT:    s_mov_b32 s8, s10
3007; GCN-NEXT:    s_mov_b32 s9, s11
3008; GCN-NEXT:    s_mov_b32 s10, s12
3009; GCN-NEXT:    s_mov_b32 s11, s13
3010; GCN-NEXT:    s_mov_b32 s12, s14
3011; GCN-NEXT:    s_mov_b32 s13, s15
3012; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
3013; GCN-NEXT:    ; return to shader part epilog
3014;
3015; GFX10PLUS-LABEL: dyn_extract_v7f64_s_s:
3016; GFX10PLUS:       ; %bb.0: ; %entry
3017; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3018; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3019; GFX10PLUS-NEXT:    s_mov_b32 m0, s16
3020; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3021; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3022; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3023; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3024; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3025; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3026; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
3027; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
3028; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
3029; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
3030; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
3031; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
3032; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[0:1]
3033; GFX10PLUS-NEXT:    ; return to shader part epilog
3034entry:
3035  %ext = extractelement <7 x double> %vec, i32 %sel
3036  ret double %ext
3037}
3038
3039define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 %sel) {
3040; GPRIDX-LABEL: dyn_extract_v5f64_s_s:
3041; GPRIDX:         .amd_kernel_code_t
3042; GPRIDX-NEXT:     amd_code_version_major = 1
3043; GPRIDX-NEXT:     amd_code_version_minor = 2
3044; GPRIDX-NEXT:     amd_machine_kind = 1
3045; GPRIDX-NEXT:     amd_machine_version_major = 9
3046; GPRIDX-NEXT:     amd_machine_version_minor = 0
3047; GPRIDX-NEXT:     amd_machine_version_stepping = 0
3048; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
3049; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
3050; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
3051; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 1
3052; GPRIDX-NEXT:     priority = 0
3053; GPRIDX-NEXT:     float_mode = 240
3054; GPRIDX-NEXT:     priv = 0
3055; GPRIDX-NEXT:     enable_dx10_clamp = 1
3056; GPRIDX-NEXT:     debug_mode = 0
3057; GPRIDX-NEXT:     enable_ieee_mode = 1
3058; GPRIDX-NEXT:     enable_wgp_mode = 0
3059; GPRIDX-NEXT:     enable_mem_ordered = 0
3060; GPRIDX-NEXT:     enable_fwd_progress = 0
3061; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3062; GPRIDX-NEXT:     user_sgpr_count = 6
3063; GPRIDX-NEXT:     enable_trap_handler = 0
3064; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
3065; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 0
3066; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 0
3067; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
3068; GPRIDX-NEXT:     enable_vgpr_workitem_id = 0
3069; GPRIDX-NEXT:     enable_exception_msb = 0
3070; GPRIDX-NEXT:     granulated_lds_size = 0
3071; GPRIDX-NEXT:     enable_exception = 0
3072; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
3073; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 0
3074; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 0
3075; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3076; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 0
3077; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
3078; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
3079; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3080; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3081; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3082; GPRIDX-NEXT:     enable_wavefront_size32 = 0
3083; GPRIDX-NEXT:     enable_ordered_append_gds = 0
3084; GPRIDX-NEXT:     private_element_size = 1
3085; GPRIDX-NEXT:     is_ptr64 = 1
3086; GPRIDX-NEXT:     is_dynamic_callstack = 0
3087; GPRIDX-NEXT:     is_debug_enabled = 0
3088; GPRIDX-NEXT:     is_xnack_enabled = 1
3089; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
3090; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
3091; GPRIDX-NEXT:     gds_segment_byte_size = 0
3092; GPRIDX-NEXT:     kernarg_segment_byte_size = 12
3093; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
3094; GPRIDX-NEXT:     wavefront_sgpr_count = 9
3095; GPRIDX-NEXT:     workitem_vgpr_count = 3
3096; GPRIDX-NEXT:     reserved_vgpr_first = 0
3097; GPRIDX-NEXT:     reserved_vgpr_count = 0
3098; GPRIDX-NEXT:     reserved_sgpr_first = 0
3099; GPRIDX-NEXT:     reserved_sgpr_count = 0
3100; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3101; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
3102; GPRIDX-NEXT:     kernarg_segment_alignment = 4
3103; GPRIDX-NEXT:     group_segment_alignment = 4
3104; GPRIDX-NEXT:     private_segment_alignment = 4
3105; GPRIDX-NEXT:     wavefront_size = 6
3106; GPRIDX-NEXT:     call_convention = -1
3107; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
3108; GPRIDX-NEXT:    .end_amd_kernel_code_t
3109; GPRIDX-NEXT:  ; %bb.0: ; %entry
3110; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3111; GPRIDX-NEXT:    s_load_dword s8, s[4:5], 0x8
3112; GPRIDX-NEXT:    s_mov_b32 s2, 0
3113; GPRIDX-NEXT:    s_mov_b32 s3, 0x40140000
3114; GPRIDX-NEXT:    s_mov_b32 s5, 0x40080000
3115; GPRIDX-NEXT:    s_mov_b32 s4, s2
3116; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
3117; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 1
3118; GPRIDX-NEXT:    s_cselect_b64 s[6:7], 2.0, 1.0
3119; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 2
3120; GPRIDX-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
3121; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 3
3122; GPRIDX-NEXT:    s_cselect_b64 s[4:5], 4.0, s[4:5]
3123; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 4
3124; GPRIDX-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3125; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
3126; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
3127; GPRIDX-NEXT:    v_mov_b32_e32 v2, 0
3128; GPRIDX-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
3129; GPRIDX-NEXT:    s_endpgm
3130;
3131; MOVREL-LABEL: dyn_extract_v5f64_s_s:
3132; MOVREL:         .amd_kernel_code_t
3133; MOVREL-NEXT:     amd_code_version_major = 1
3134; MOVREL-NEXT:     amd_code_version_minor = 2
3135; MOVREL-NEXT:     amd_machine_kind = 1
3136; MOVREL-NEXT:     amd_machine_version_major = 8
3137; MOVREL-NEXT:     amd_machine_version_minor = 0
3138; MOVREL-NEXT:     amd_machine_version_stepping = 3
3139; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
3140; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
3141; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
3142; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 1
3143; MOVREL-NEXT:     priority = 0
3144; MOVREL-NEXT:     float_mode = 240
3145; MOVREL-NEXT:     priv = 0
3146; MOVREL-NEXT:     enable_dx10_clamp = 1
3147; MOVREL-NEXT:     debug_mode = 0
3148; MOVREL-NEXT:     enable_ieee_mode = 1
3149; MOVREL-NEXT:     enable_wgp_mode = 0
3150; MOVREL-NEXT:     enable_mem_ordered = 0
3151; MOVREL-NEXT:     enable_fwd_progress = 0
3152; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3153; MOVREL-NEXT:     user_sgpr_count = 6
3154; MOVREL-NEXT:     enable_trap_handler = 0
3155; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
3156; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 0
3157; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 0
3158; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
3159; MOVREL-NEXT:     enable_vgpr_workitem_id = 0
3160; MOVREL-NEXT:     enable_exception_msb = 0
3161; MOVREL-NEXT:     granulated_lds_size = 0
3162; MOVREL-NEXT:     enable_exception = 0
3163; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
3164; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 0
3165; MOVREL-NEXT:     enable_sgpr_queue_ptr = 0
3166; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3167; MOVREL-NEXT:     enable_sgpr_dispatch_id = 0
3168; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
3169; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
3170; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3171; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3172; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3173; MOVREL-NEXT:     enable_wavefront_size32 = 0
3174; MOVREL-NEXT:     enable_ordered_append_gds = 0
3175; MOVREL-NEXT:     private_element_size = 1
3176; MOVREL-NEXT:     is_ptr64 = 1
3177; MOVREL-NEXT:     is_dynamic_callstack = 0
3178; MOVREL-NEXT:     is_debug_enabled = 0
3179; MOVREL-NEXT:     is_xnack_enabled = 0
3180; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
3181; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
3182; MOVREL-NEXT:     gds_segment_byte_size = 0
3183; MOVREL-NEXT:     kernarg_segment_byte_size = 12
3184; MOVREL-NEXT:     workgroup_fbarrier_count = 0
3185; MOVREL-NEXT:     wavefront_sgpr_count = 9
3186; MOVREL-NEXT:     workitem_vgpr_count = 4
3187; MOVREL-NEXT:     reserved_vgpr_first = 0
3188; MOVREL-NEXT:     reserved_vgpr_count = 0
3189; MOVREL-NEXT:     reserved_sgpr_first = 0
3190; MOVREL-NEXT:     reserved_sgpr_count = 0
3191; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3192; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
3193; MOVREL-NEXT:     kernarg_segment_alignment = 4
3194; MOVREL-NEXT:     group_segment_alignment = 4
3195; MOVREL-NEXT:     private_segment_alignment = 4
3196; MOVREL-NEXT:     wavefront_size = 6
3197; MOVREL-NEXT:     call_convention = -1
3198; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
3199; MOVREL-NEXT:    .end_amd_kernel_code_t
3200; MOVREL-NEXT:  ; %bb.0: ; %entry
3201; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3202; MOVREL-NEXT:    s_load_dword s8, s[4:5], 0x8
3203; MOVREL-NEXT:    s_mov_b32 s2, 0
3204; MOVREL-NEXT:    s_mov_b32 s3, 0x40140000
3205; MOVREL-NEXT:    s_mov_b32 s5, 0x40080000
3206; MOVREL-NEXT:    s_mov_b32 s4, s2
3207; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
3208; MOVREL-NEXT:    s_cmp_eq_u32 s8, 1
3209; MOVREL-NEXT:    s_cselect_b64 s[6:7], 2.0, 1.0
3210; MOVREL-NEXT:    s_cmp_eq_u32 s8, 2
3211; MOVREL-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
3212; MOVREL-NEXT:    s_cmp_eq_u32 s8, 3
3213; MOVREL-NEXT:    s_cselect_b64 s[4:5], 4.0, s[4:5]
3214; MOVREL-NEXT:    s_cmp_eq_u32 s8, 4
3215; MOVREL-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3216; MOVREL-NEXT:    v_mov_b32_e32 v0, s2
3217; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
3218; MOVREL-NEXT:    v_mov_b32_e32 v1, s3
3219; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
3220; MOVREL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3221; MOVREL-NEXT:    s_endpgm
3222;
3223; GFX10-LABEL: dyn_extract_v5f64_s_s:
3224; GFX10:         .amd_kernel_code_t
3225; GFX10-NEXT:     amd_code_version_major = 1
3226; GFX10-NEXT:     amd_code_version_minor = 2
3227; GFX10-NEXT:     amd_machine_kind = 1
3228; GFX10-NEXT:     amd_machine_version_major = 10
3229; GFX10-NEXT:     amd_machine_version_minor = 1
3230; GFX10-NEXT:     amd_machine_version_stepping = 0
3231; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
3232; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
3233; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
3234; GFX10-NEXT:     granulated_wavefront_sgpr_count = 1
3235; GFX10-NEXT:     priority = 0
3236; GFX10-NEXT:     float_mode = 240
3237; GFX10-NEXT:     priv = 0
3238; GFX10-NEXT:     enable_dx10_clamp = 1
3239; GFX10-NEXT:     debug_mode = 0
3240; GFX10-NEXT:     enable_ieee_mode = 1
3241; GFX10-NEXT:     enable_wgp_mode = 1
3242; GFX10-NEXT:     enable_mem_ordered = 1
3243; GFX10-NEXT:     enable_fwd_progress = 0
3244; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3245; GFX10-NEXT:     user_sgpr_count = 6
3246; GFX10-NEXT:     enable_trap_handler = 0
3247; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
3248; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 0
3249; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 0
3250; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
3251; GFX10-NEXT:     enable_vgpr_workitem_id = 0
3252; GFX10-NEXT:     enable_exception_msb = 0
3253; GFX10-NEXT:     granulated_lds_size = 0
3254; GFX10-NEXT:     enable_exception = 0
3255; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
3256; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 0
3257; GFX10-NEXT:     enable_sgpr_queue_ptr = 0
3258; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3259; GFX10-NEXT:     enable_sgpr_dispatch_id = 0
3260; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
3261; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
3262; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3263; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3264; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3265; GFX10-NEXT:     enable_wavefront_size32 = 1
3266; GFX10-NEXT:     enable_ordered_append_gds = 0
3267; GFX10-NEXT:     private_element_size = 1
3268; GFX10-NEXT:     is_ptr64 = 1
3269; GFX10-NEXT:     is_dynamic_callstack = 0
3270; GFX10-NEXT:     is_debug_enabled = 0
3271; GFX10-NEXT:     is_xnack_enabled = 1
3272; GFX10-NEXT:     workitem_private_segment_byte_size = 0
3273; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
3274; GFX10-NEXT:     gds_segment_byte_size = 0
3275; GFX10-NEXT:     kernarg_segment_byte_size = 12
3276; GFX10-NEXT:     workgroup_fbarrier_count = 0
3277; GFX10-NEXT:     wavefront_sgpr_count = 9
3278; GFX10-NEXT:     workitem_vgpr_count = 3
3279; GFX10-NEXT:     reserved_vgpr_first = 0
3280; GFX10-NEXT:     reserved_vgpr_count = 0
3281; GFX10-NEXT:     reserved_sgpr_first = 0
3282; GFX10-NEXT:     reserved_sgpr_count = 0
3283; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3284; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
3285; GFX10-NEXT:     kernarg_segment_alignment = 4
3286; GFX10-NEXT:     group_segment_alignment = 4
3287; GFX10-NEXT:     private_segment_alignment = 4
3288; GFX10-NEXT:     wavefront_size = 5
3289; GFX10-NEXT:     call_convention = -1
3290; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
3291; GFX10-NEXT:    .end_amd_kernel_code_t
3292; GFX10-NEXT:  ; %bb.0: ; %entry
3293; GFX10-NEXT:    s_clause 0x1
3294; GFX10-NEXT:    s_load_dword s8, s[4:5], 0x8
3295; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3296; GFX10-NEXT:    s_mov_b32 s2, 0
3297; GFX10-NEXT:    s_mov_b32 s3, 0x40140000
3298; GFX10-NEXT:    s_mov_b32 s5, 0x40080000
3299; GFX10-NEXT:    s_mov_b32 s4, s2
3300; GFX10-NEXT:    v_mov_b32_e32 v2, 0
3301; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3302; GFX10-NEXT:    s_cmp_eq_u32 s8, 1
3303; GFX10-NEXT:    s_cselect_b64 s[6:7], 2.0, 1.0
3304; GFX10-NEXT:    s_cmp_eq_u32 s8, 2
3305; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
3306; GFX10-NEXT:    s_cmp_eq_u32 s8, 3
3307; GFX10-NEXT:    s_cselect_b64 s[4:5], 4.0, s[4:5]
3308; GFX10-NEXT:    s_cmp_eq_u32 s8, 4
3309; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3310; GFX10-NEXT:    v_mov_b32_e32 v0, s2
3311; GFX10-NEXT:    v_mov_b32_e32 v1, s3
3312; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
3313; GFX10-NEXT:    s_endpgm
3314;
3315; GFX11-LABEL: dyn_extract_v5f64_s_s:
3316; GFX11:         .amd_kernel_code_t
3317; GFX11-NEXT:     amd_code_version_major = 1
3318; GFX11-NEXT:     amd_code_version_minor = 2
3319; GFX11-NEXT:     amd_machine_kind = 1
3320; GFX11-NEXT:     amd_machine_version_major = 11
3321; GFX11-NEXT:     amd_machine_version_minor = 0
3322; GFX11-NEXT:     amd_machine_version_stepping = 0
3323; GFX11-NEXT:     kernel_code_entry_byte_offset = 256
3324; GFX11-NEXT:     kernel_code_prefetch_byte_size = 0
3325; GFX11-NEXT:     granulated_workitem_vgpr_count = 0
3326; GFX11-NEXT:     granulated_wavefront_sgpr_count = 1
3327; GFX11-NEXT:     priority = 0
3328; GFX11-NEXT:     float_mode = 240
3329; GFX11-NEXT:     priv = 0
3330; GFX11-NEXT:     enable_dx10_clamp = 1
3331; GFX11-NEXT:     debug_mode = 0
3332; GFX11-NEXT:     enable_ieee_mode = 1
3333; GFX11-NEXT:     enable_wgp_mode = 1
3334; GFX11-NEXT:     enable_mem_ordered = 1
3335; GFX11-NEXT:     enable_fwd_progress = 0
3336; GFX11-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3337; GFX11-NEXT:     user_sgpr_count = 15
3338; GFX11-NEXT:     enable_trap_handler = 0
3339; GFX11-NEXT:     enable_sgpr_workgroup_id_x = 1
3340; GFX11-NEXT:     enable_sgpr_workgroup_id_y = 0
3341; GFX11-NEXT:     enable_sgpr_workgroup_id_z = 0
3342; GFX11-NEXT:     enable_sgpr_workgroup_info = 0
3343; GFX11-NEXT:     enable_vgpr_workitem_id = 0
3344; GFX11-NEXT:     enable_exception_msb = 0
3345; GFX11-NEXT:     granulated_lds_size = 0
3346; GFX11-NEXT:     enable_exception = 0
3347; GFX11-NEXT:     enable_sgpr_private_segment_buffer = 0
3348; GFX11-NEXT:     enable_sgpr_dispatch_ptr = 0
3349; GFX11-NEXT:     enable_sgpr_queue_ptr = 0
3350; GFX11-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3351; GFX11-NEXT:     enable_sgpr_dispatch_id = 0
3352; GFX11-NEXT:     enable_sgpr_flat_scratch_init = 0
3353; GFX11-NEXT:     enable_sgpr_private_segment_size = 0
3354; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3355; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3356; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3357; GFX11-NEXT:     enable_wavefront_size32 = 1
3358; GFX11-NEXT:     enable_ordered_append_gds = 0
3359; GFX11-NEXT:     private_element_size = 1
3360; GFX11-NEXT:     is_ptr64 = 1
3361; GFX11-NEXT:     is_dynamic_callstack = 0
3362; GFX11-NEXT:     is_debug_enabled = 0
3363; GFX11-NEXT:     is_xnack_enabled = 0
3364; GFX11-NEXT:     workitem_private_segment_byte_size = 0
3365; GFX11-NEXT:     workgroup_group_segment_byte_size = 0
3366; GFX11-NEXT:     gds_segment_byte_size = 0
3367; GFX11-NEXT:     kernarg_segment_byte_size = 12
3368; GFX11-NEXT:     workgroup_fbarrier_count = 0
3369; GFX11-NEXT:     wavefront_sgpr_count = 9
3370; GFX11-NEXT:     workitem_vgpr_count = 3
3371; GFX11-NEXT:     reserved_vgpr_first = 0
3372; GFX11-NEXT:     reserved_vgpr_count = 0
3373; GFX11-NEXT:     reserved_sgpr_first = 0
3374; GFX11-NEXT:     reserved_sgpr_count = 0
3375; GFX11-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3376; GFX11-NEXT:     debug_private_segment_buffer_sgpr = 0
3377; GFX11-NEXT:     kernarg_segment_alignment = 4
3378; GFX11-NEXT:     group_segment_alignment = 4
3379; GFX11-NEXT:     private_segment_alignment = 4
3380; GFX11-NEXT:     wavefront_size = 5
3381; GFX11-NEXT:     call_convention = -1
3382; GFX11-NEXT:     runtime_loader_kernel_symbol = 0
3383; GFX11-NEXT:    .end_amd_kernel_code_t
3384; GFX11-NEXT:  ; %bb.0: ; %entry
3385; GFX11-NEXT:    s_clause 0x1
3386; GFX11-NEXT:    s_load_b32 s8, s[0:1], 0x8
3387; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
3388; GFX11-NEXT:    s_mov_b32 s2, 0
3389; GFX11-NEXT:    s_mov_b32 s3, 0x40140000
3390; GFX11-NEXT:    s_mov_b32 s5, 0x40080000
3391; GFX11-NEXT:    s_mov_b32 s4, s2
3392; GFX11-NEXT:    v_mov_b32_e32 v2, 0
3393; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3394; GFX11-NEXT:    s_cmp_eq_u32 s8, 1
3395; GFX11-NEXT:    s_cselect_b64 s[6:7], 2.0, 1.0
3396; GFX11-NEXT:    s_cmp_eq_u32 s8, 2
3397; GFX11-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
3398; GFX11-NEXT:    s_cmp_eq_u32 s8, 3
3399; GFX11-NEXT:    s_cselect_b64 s[4:5], 4.0, s[4:5]
3400; GFX11-NEXT:    s_cmp_eq_u32 s8, 4
3401; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3402; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
3403; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
3404; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
3405; GFX11-NEXT:    s_endpgm
3406entry:
3407  %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel
3408  store double %ext, double addrspace(1)* %out
3409  ret void
3410}
3411
3412define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
3413; GCN-LABEL: dyn_extract_v15f32_const_s_v:
3414; GCN:       ; %bb.0: ; %entry
3415; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3416; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3417; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
3418; GCN-NEXT:    v_cndmask_b32_e64 v13, 1.0, 2.0, vcc
3419; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
3420; GCN-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
3421; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
3422; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
3423; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
3424; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
3425; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
3426; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3427; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
3428; GCN-NEXT:    v_mov_b32_e32 v4, 0x40e00000
3429; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
3430; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
3431; GCN-NEXT:    v_mov_b32_e32 v5, 0x41000000
3432; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
3433; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
3434; GCN-NEXT:    v_mov_b32_e32 v6, 0x41100000
3435; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
3436; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v0
3437; GCN-NEXT:    v_mov_b32_e32 v7, 0x41200000
3438; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
3439; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
3440; GCN-NEXT:    v_mov_b32_e32 v8, 0x41300000
3441; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
3442; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
3443; GCN-NEXT:    v_mov_b32_e32 v9, 0x41400000
3444; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
3445; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
3446; GCN-NEXT:    v_mov_b32_e32 v10, 0x41500000
3447; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
3448; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
3449; GCN-NEXT:    v_mov_b32_e32 v11, 0x41600000
3450; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
3451; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
3452; GCN-NEXT:    v_mov_b32_e32 v12, 0x41700000
3453; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
3454; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
3455; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v12, vcc
3456; GCN-NEXT:    s_setpc_b64 s[30:31]
3457;
3458; GFX10PLUS-LABEL: dyn_extract_v15f32_const_s_v:
3459; GFX10PLUS:       ; %bb.0: ; %entry
3460; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3461; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
3462; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3463; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3464; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
3465; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3466; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
3467; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3468; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
3469; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3470; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
3471; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3472; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
3473; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3474; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
3475; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3476; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
3477; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3478; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
3479; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3480; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
3481; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3482; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
3483; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3484; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
3485; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3486; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
3487; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3488; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
3489; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, 0x41700000, vcc_lo
3490; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
3491entry:
3492  %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3493  ret float %ext
3494}
3495
3496define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) {
3497; GCN-LABEL: dyn_extract_v15f32_const_s_s:
3498; GCN:       ; %bb.0: ; %entry
3499; GCN-NEXT:    s_mov_b32 s4, 1.0
3500; GCN-NEXT:    s_mov_b32 m0, s2
3501; GCN-NEXT:    s_mov_b32 s18, 0x41700000
3502; GCN-NEXT:    s_mov_b32 s17, 0x41600000
3503; GCN-NEXT:    s_mov_b32 s16, 0x41500000
3504; GCN-NEXT:    s_mov_b32 s15, 0x41400000
3505; GCN-NEXT:    s_mov_b32 s14, 0x41300000
3506; GCN-NEXT:    s_mov_b32 s13, 0x41200000
3507; GCN-NEXT:    s_mov_b32 s12, 0x41100000
3508; GCN-NEXT:    s_mov_b32 s11, 0x41000000
3509; GCN-NEXT:    s_mov_b32 s10, 0x40e00000
3510; GCN-NEXT:    s_mov_b32 s9, 0x40c00000
3511; GCN-NEXT:    s_mov_b32 s8, 0x40a00000
3512; GCN-NEXT:    s_mov_b32 s7, 4.0
3513; GCN-NEXT:    s_mov_b32 s6, 0x40400000
3514; GCN-NEXT:    s_mov_b32 s5, 2.0
3515; GCN-NEXT:    s_movrels_b32 s0, s4
3516; GCN-NEXT:    v_mov_b32_e32 v0, s0
3517; GCN-NEXT:    ; return to shader part epilog
3518;
3519; GFX10PLUS-LABEL: dyn_extract_v15f32_const_s_s:
3520; GFX10PLUS:       ; %bb.0: ; %entry
3521; GFX10PLUS-NEXT:    s_mov_b32 s4, 1.0
3522; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
3523; GFX10PLUS-NEXT:    s_mov_b32 s18, 0x41700000
3524; GFX10PLUS-NEXT:    s_mov_b32 s17, 0x41600000
3525; GFX10PLUS-NEXT:    s_mov_b32 s16, 0x41500000
3526; GFX10PLUS-NEXT:    s_mov_b32 s15, 0x41400000
3527; GFX10PLUS-NEXT:    s_mov_b32 s14, 0x41300000
3528; GFX10PLUS-NEXT:    s_mov_b32 s13, 0x41200000
3529; GFX10PLUS-NEXT:    s_mov_b32 s12, 0x41100000
3530; GFX10PLUS-NEXT:    s_mov_b32 s11, 0x41000000
3531; GFX10PLUS-NEXT:    s_mov_b32 s10, 0x40e00000
3532; GFX10PLUS-NEXT:    s_mov_b32 s9, 0x40c00000
3533; GFX10PLUS-NEXT:    s_mov_b32 s8, 0x40a00000
3534; GFX10PLUS-NEXT:    s_mov_b32 s7, 4.0
3535; GFX10PLUS-NEXT:    s_mov_b32 s6, 0x40400000
3536; GFX10PLUS-NEXT:    s_mov_b32 s5, 2.0
3537; GFX10PLUS-NEXT:    s_movrels_b32 s0, s4
3538; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
3539; GFX10PLUS-NEXT:    ; return to shader part epilog
3540entry:
3541  %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3542  ret float %ext
3543}
3544
3545define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) {
3546; GCN-LABEL: dyn_extract_v15f32_s_v:
3547; GCN:       ; %bb.0: ; %entry
3548; GCN-NEXT:    s_mov_b32 s0, s2
3549; GCN-NEXT:    s_mov_b32 s1, s3
3550; GCN-NEXT:    s_mov_b32 s2, s4
3551; GCN-NEXT:    v_mov_b32_e32 v1, s0
3552; GCN-NEXT:    v_mov_b32_e32 v2, s1
3553; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3554; GCN-NEXT:    s_mov_b32 s3, s5
3555; GCN-NEXT:    v_mov_b32_e32 v3, s2
3556; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3557; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
3558; GCN-NEXT:    s_mov_b32 s4, s6
3559; GCN-NEXT:    v_mov_b32_e32 v4, s3
3560; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
3561; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
3562; GCN-NEXT:    s_mov_b32 s5, s7
3563; GCN-NEXT:    v_mov_b32_e32 v5, s4
3564; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
3565; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
3566; GCN-NEXT:    s_mov_b32 s6, s8
3567; GCN-NEXT:    v_mov_b32_e32 v6, s5
3568; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
3569; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
3570; GCN-NEXT:    s_mov_b32 s7, s9
3571; GCN-NEXT:    v_mov_b32_e32 v7, s6
3572; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
3573; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
3574; GCN-NEXT:    s_mov_b32 s8, s10
3575; GCN-NEXT:    v_mov_b32_e32 v8, s7
3576; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
3577; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
3578; GCN-NEXT:    s_mov_b32 s9, s11
3579; GCN-NEXT:    v_mov_b32_e32 v9, s8
3580; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
3581; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v0
3582; GCN-NEXT:    s_mov_b32 s10, s12
3583; GCN-NEXT:    v_mov_b32_e32 v10, s9
3584; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
3585; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
3586; GCN-NEXT:    v_mov_b32_e32 v11, s10
3587; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
3588; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
3589; GCN-NEXT:    v_mov_b32_e32 v12, s13
3590; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
3591; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
3592; GCN-NEXT:    v_mov_b32_e32 v13, s14
3593; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
3594; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
3595; GCN-NEXT:    v_mov_b32_e32 v14, s15
3596; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
3597; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
3598; GCN-NEXT:    v_mov_b32_e32 v15, s16
3599; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
3600; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
3601; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
3602; GCN-NEXT:    ; return to shader part epilog
3603;
3604; GFX10PLUS-LABEL: dyn_extract_v15f32_s_v:
3605; GFX10PLUS:       ; %bb.0: ; %entry
3606; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3607; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3608; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, s1
3609; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3610; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3611; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3612; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3613; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, s0, v1, vcc_lo
3614; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
3615; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3616; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3617; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3618; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
3619; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
3620; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
3621; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
3622; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
3623; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
3624; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
3625; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
3626; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
3627; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
3628; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
3629; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
3630; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
3631; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
3632; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
3633; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
3634; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
3635; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s7, vcc_lo
3636; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
3637; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
3638; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
3639; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s9, vcc_lo
3640; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
3641; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
3642; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
3643; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s11, vcc_lo
3644; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
3645; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
3646; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
3647; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s13, vcc_lo
3648; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
3649; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, s14, vcc_lo
3650; GFX10PLUS-NEXT:    ; return to shader part epilog
3651entry:
3652  %ext = extractelement <15 x float> %vec, i32 %sel
3653  ret float %ext
3654}
3655
3656define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) {
3657; GCN-LABEL: dyn_extract_v15f32_v_v:
3658; GCN:       ; %bb.0: ; %entry
3659; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3660; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3661; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3662; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3663; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3664; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3665; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3666; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3667; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3668; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3669; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3670; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3671; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3672; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3673; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3674; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3675; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3676; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3677; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3678; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3679; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3680; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3681; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3682; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3683; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3684; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3685; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3686; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3687; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3688; GCN-NEXT:    s_setpc_b64 s[30:31]
3689;
3690; GFX10PLUS-LABEL: dyn_extract_v15f32_v_v:
3691; GFX10PLUS:       ; %bb.0: ; %entry
3692; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3693; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
3694; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
3695; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3696; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v15
3697; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3698; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
3699; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3700; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v15
3701; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3702; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v15
3703; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3704; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v15
3705; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3706; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v15
3707; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3708; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v15
3709; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3710; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v15
3711; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3712; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v15
3713; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3714; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v15
3715; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3716; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v15
3717; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3718; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v15
3719; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3720; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v15
3721; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3722; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
3723entry:
3724  %ext = extractelement <15 x float> %vec, i32 %sel
3725  ret float %ext
3726}
3727
3728define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) {
3729; GPRIDX-LABEL: dyn_extract_v15f32_v_s:
3730; GPRIDX:       ; %bb.0: ; %entry
3731; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
3732; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
3733; GPRIDX-NEXT:    s_set_gpr_idx_off
3734; GPRIDX-NEXT:    ; return to shader part epilog
3735;
3736; MOVREL-LABEL: dyn_extract_v15f32_v_s:
3737; MOVREL:       ; %bb.0: ; %entry
3738; MOVREL-NEXT:    s_mov_b32 m0, s2
3739; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
3740; MOVREL-NEXT:    ; return to shader part epilog
3741;
3742; GFX10PLUS-LABEL: dyn_extract_v15f32_v_s:
3743; GFX10PLUS:       ; %bb.0: ; %entry
3744; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
3745; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v0
3746; GFX10PLUS-NEXT:    ; return to shader part epilog
3747entry:
3748  %ext = extractelement <15 x float> %vec, i32 %sel
3749  ret float %ext
3750}
3751
3752define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) {
3753; GCN-LABEL: dyn_extract_v15f32_s_s:
3754; GCN:       ; %bb.0: ; %entry
3755; GCN-NEXT:    s_mov_b32 s0, s2
3756; GCN-NEXT:    s_mov_b32 m0, s17
3757; GCN-NEXT:    s_mov_b32 s1, s3
3758; GCN-NEXT:    s_mov_b32 s2, s4
3759; GCN-NEXT:    s_mov_b32 s3, s5
3760; GCN-NEXT:    s_mov_b32 s4, s6
3761; GCN-NEXT:    s_mov_b32 s5, s7
3762; GCN-NEXT:    s_mov_b32 s6, s8
3763; GCN-NEXT:    s_mov_b32 s7, s9
3764; GCN-NEXT:    s_mov_b32 s8, s10
3765; GCN-NEXT:    s_mov_b32 s9, s11
3766; GCN-NEXT:    s_mov_b32 s10, s12
3767; GCN-NEXT:    s_mov_b32 s11, s13
3768; GCN-NEXT:    s_mov_b32 s12, s14
3769; GCN-NEXT:    s_mov_b32 s13, s15
3770; GCN-NEXT:    s_mov_b32 s14, s16
3771; GCN-NEXT:    s_movrels_b32 s0, s0
3772; GCN-NEXT:    v_mov_b32_e32 v0, s0
3773; GCN-NEXT:    ; return to shader part epilog
3774;
3775; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s:
3776; GFX10PLUS:       ; %bb.0: ; %entry
3777; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3778; GFX10PLUS-NEXT:    s_mov_b32 m0, s17
3779; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3780; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3781; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3782; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3783; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3784; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3785; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3786; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
3787; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
3788; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
3789; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
3790; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
3791; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
3792; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
3793; GFX10PLUS-NEXT:    s_movrels_b32 s0, s0
3794; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
3795; GFX10PLUS-NEXT:    ; return to shader part epilog
3796entry:
3797  %ext = extractelement <15 x float> %vec, i32 %sel
3798  ret float %ext
3799}
3800
3801define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) {
3802; GCN-LABEL: dyn_extract_v15f32_s_s_offset3:
3803; GCN:       ; %bb.0: ; %entry
3804; GCN-NEXT:    s_mov_b32 s0, s2
3805; GCN-NEXT:    s_mov_b32 s1, s3
3806; GCN-NEXT:    s_mov_b32 s3, s5
3807; GCN-NEXT:    s_mov_b32 m0, s17
3808; GCN-NEXT:    s_mov_b32 s2, s4
3809; GCN-NEXT:    s_mov_b32 s4, s6
3810; GCN-NEXT:    s_mov_b32 s5, s7
3811; GCN-NEXT:    s_mov_b32 s6, s8
3812; GCN-NEXT:    s_mov_b32 s7, s9
3813; GCN-NEXT:    s_mov_b32 s8, s10
3814; GCN-NEXT:    s_mov_b32 s9, s11
3815; GCN-NEXT:    s_mov_b32 s10, s12
3816; GCN-NEXT:    s_mov_b32 s11, s13
3817; GCN-NEXT:    s_mov_b32 s12, s14
3818; GCN-NEXT:    s_mov_b32 s13, s15
3819; GCN-NEXT:    s_mov_b32 s14, s16
3820; GCN-NEXT:    s_movrels_b32 s0, s3
3821; GCN-NEXT:    v_mov_b32_e32 v0, s0
3822; GCN-NEXT:    ; return to shader part epilog
3823;
3824; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s_offset3:
3825; GFX10PLUS:       ; %bb.0: ; %entry
3826; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3827; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3828; GFX10PLUS-NEXT:    s_mov_b32 m0, s17
3829; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3830; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3831; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3832; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3833; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3834; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3835; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
3836; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
3837; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
3838; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
3839; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
3840; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
3841; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
3842; GFX10PLUS-NEXT:    s_movrels_b32 s0, s3
3843; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
3844; GFX10PLUS-NEXT:    ; return to shader part epilog
3845entry:
3846  %add = add i32 %sel, 3
3847  %ext = extractelement <15 x float> %vec, i32 %add
3848  ret float %ext
3849}
3850
3851define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) {
3852; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3:
3853; GPRIDX:       ; %bb.0: ; %entry
3854; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3855; GPRIDX-NEXT:    v_add_u32_e32 v15, 3, v15
3856; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3857; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3858; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3859; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3860; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3861; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3862; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3863; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3864; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3865; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3866; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3867; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3868; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3869; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3870; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3871; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3872; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3873; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3874; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3875; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3876; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3877; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3878; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3879; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3880; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3881; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3882; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3883; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3884; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
3885;
3886; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3:
3887; MOVREL:       ; %bb.0: ; %entry
3888; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3889; MOVREL-NEXT:    v_add_u32_e32 v15, vcc, 3, v15
3890; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3891; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3892; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3893; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3894; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3895; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3896; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3897; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3898; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3899; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3900; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3901; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3902; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3903; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3904; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3905; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3906; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3907; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3908; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3909; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3910; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3911; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3912; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3913; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3914; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3915; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3916; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3917; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3918; MOVREL-NEXT:    s_setpc_b64 s[30:31]
3919;
3920; GFX10PLUS-LABEL: dyn_extract_v15f32_v_v_offset3:
3921; GFX10PLUS:       ; %bb.0: ; %entry
3922; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3923; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
3924; GFX10PLUS-NEXT:    v_add_nc_u32_e32 v15, 3, v15
3925; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
3926; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3927; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v15
3928; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3929; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
3930; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3931; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v15
3932; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3933; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v15
3934; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3935; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v15
3936; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3937; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v15
3938; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3939; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v15
3940; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3941; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v15
3942; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3943; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v15
3944; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3945; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v15
3946; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3947; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v15
3948; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3949; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v15
3950; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3951; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v15
3952; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3953; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
3954entry:
3955  %add = add i32 %sel, 3
3956  %ext = extractelement <15 x float> %vec, i32 %add
3957  ret float %ext
3958}
3959
3960define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32 %sel) {
3961; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s:
3962; GPRIDX:         .amd_kernel_code_t
3963; GPRIDX-NEXT:     amd_code_version_major = 1
3964; GPRIDX-NEXT:     amd_code_version_minor = 2
3965; GPRIDX-NEXT:     amd_machine_kind = 1
3966; GPRIDX-NEXT:     amd_machine_version_major = 9
3967; GPRIDX-NEXT:     amd_machine_version_minor = 0
3968; GPRIDX-NEXT:     amd_machine_version_stepping = 0
3969; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
3970; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
3971; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
3972; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 0
3973; GPRIDX-NEXT:     priority = 0
3974; GPRIDX-NEXT:     float_mode = 240
3975; GPRIDX-NEXT:     priv = 0
3976; GPRIDX-NEXT:     enable_dx10_clamp = 1
3977; GPRIDX-NEXT:     debug_mode = 0
3978; GPRIDX-NEXT:     enable_ieee_mode = 1
3979; GPRIDX-NEXT:     enable_wgp_mode = 0
3980; GPRIDX-NEXT:     enable_mem_ordered = 0
3981; GPRIDX-NEXT:     enable_fwd_progress = 0
3982; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3983; GPRIDX-NEXT:     user_sgpr_count = 6
3984; GPRIDX-NEXT:     enable_trap_handler = 0
3985; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
3986; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 0
3987; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 0
3988; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
3989; GPRIDX-NEXT:     enable_vgpr_workitem_id = 0
3990; GPRIDX-NEXT:     enable_exception_msb = 0
3991; GPRIDX-NEXT:     granulated_lds_size = 0
3992; GPRIDX-NEXT:     enable_exception = 0
3993; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
3994; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 0
3995; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 0
3996; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3997; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 0
3998; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
3999; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
4000; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4001; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4002; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4003; GPRIDX-NEXT:     enable_wavefront_size32 = 0
4004; GPRIDX-NEXT:     enable_ordered_append_gds = 0
4005; GPRIDX-NEXT:     private_element_size = 1
4006; GPRIDX-NEXT:     is_ptr64 = 1
4007; GPRIDX-NEXT:     is_dynamic_callstack = 0
4008; GPRIDX-NEXT:     is_debug_enabled = 0
4009; GPRIDX-NEXT:     is_xnack_enabled = 1
4010; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
4011; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
4012; GPRIDX-NEXT:     gds_segment_byte_size = 0
4013; GPRIDX-NEXT:     kernarg_segment_byte_size = 12
4014; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
4015; GPRIDX-NEXT:     wavefront_sgpr_count = 6
4016; GPRIDX-NEXT:     workitem_vgpr_count = 2
4017; GPRIDX-NEXT:     reserved_vgpr_first = 0
4018; GPRIDX-NEXT:     reserved_vgpr_count = 0
4019; GPRIDX-NEXT:     reserved_sgpr_first = 0
4020; GPRIDX-NEXT:     reserved_sgpr_count = 0
4021; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4022; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
4023; GPRIDX-NEXT:     kernarg_segment_alignment = 4
4024; GPRIDX-NEXT:     group_segment_alignment = 4
4025; GPRIDX-NEXT:     private_segment_alignment = 4
4026; GPRIDX-NEXT:     wavefront_size = 6
4027; GPRIDX-NEXT:     call_convention = -1
4028; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
4029; GPRIDX-NEXT:    .end_amd_kernel_code_t
4030; GPRIDX-NEXT:  ; %bb.0: ; %entry
4031; GPRIDX-NEXT:    s_load_dword s2, s[4:5], 0x8
4032; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4033; GPRIDX-NEXT:    v_mov_b32_e32 v1, 0
4034; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
4035; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 1
4036; GPRIDX-NEXT:    s_cselect_b32 s3, 2.0, 1.0
4037; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 2
4038; GPRIDX-NEXT:    s_cselect_b32 s3, 0x40400000, s3
4039; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 3
4040; GPRIDX-NEXT:    s_cselect_b32 s2, 4.0, s3
4041; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
4042; GPRIDX-NEXT:    global_store_dword v1, v0, s[0:1]
4043; GPRIDX-NEXT:    s_endpgm
4044;
4045; MOVREL-LABEL: dyn_extract_v4f32_s_s_s:
4046; MOVREL:         .amd_kernel_code_t
4047; MOVREL-NEXT:     amd_code_version_major = 1
4048; MOVREL-NEXT:     amd_code_version_minor = 2
4049; MOVREL-NEXT:     amd_machine_kind = 1
4050; MOVREL-NEXT:     amd_machine_version_major = 8
4051; MOVREL-NEXT:     amd_machine_version_minor = 0
4052; MOVREL-NEXT:     amd_machine_version_stepping = 3
4053; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
4054; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
4055; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
4056; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 0
4057; MOVREL-NEXT:     priority = 0
4058; MOVREL-NEXT:     float_mode = 240
4059; MOVREL-NEXT:     priv = 0
4060; MOVREL-NEXT:     enable_dx10_clamp = 1
4061; MOVREL-NEXT:     debug_mode = 0
4062; MOVREL-NEXT:     enable_ieee_mode = 1
4063; MOVREL-NEXT:     enable_wgp_mode = 0
4064; MOVREL-NEXT:     enable_mem_ordered = 0
4065; MOVREL-NEXT:     enable_fwd_progress = 0
4066; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4067; MOVREL-NEXT:     user_sgpr_count = 6
4068; MOVREL-NEXT:     enable_trap_handler = 0
4069; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
4070; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 0
4071; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 0
4072; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
4073; MOVREL-NEXT:     enable_vgpr_workitem_id = 0
4074; MOVREL-NEXT:     enable_exception_msb = 0
4075; MOVREL-NEXT:     granulated_lds_size = 0
4076; MOVREL-NEXT:     enable_exception = 0
4077; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
4078; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 0
4079; MOVREL-NEXT:     enable_sgpr_queue_ptr = 0
4080; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4081; MOVREL-NEXT:     enable_sgpr_dispatch_id = 0
4082; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
4083; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
4084; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4085; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4086; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4087; MOVREL-NEXT:     enable_wavefront_size32 = 0
4088; MOVREL-NEXT:     enable_ordered_append_gds = 0
4089; MOVREL-NEXT:     private_element_size = 1
4090; MOVREL-NEXT:     is_ptr64 = 1
4091; MOVREL-NEXT:     is_dynamic_callstack = 0
4092; MOVREL-NEXT:     is_debug_enabled = 0
4093; MOVREL-NEXT:     is_xnack_enabled = 0
4094; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
4095; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
4096; MOVREL-NEXT:     gds_segment_byte_size = 0
4097; MOVREL-NEXT:     kernarg_segment_byte_size = 12
4098; MOVREL-NEXT:     workgroup_fbarrier_count = 0
4099; MOVREL-NEXT:     wavefront_sgpr_count = 6
4100; MOVREL-NEXT:     workitem_vgpr_count = 3
4101; MOVREL-NEXT:     reserved_vgpr_first = 0
4102; MOVREL-NEXT:     reserved_vgpr_count = 0
4103; MOVREL-NEXT:     reserved_sgpr_first = 0
4104; MOVREL-NEXT:     reserved_sgpr_count = 0
4105; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4106; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
4107; MOVREL-NEXT:     kernarg_segment_alignment = 4
4108; MOVREL-NEXT:     group_segment_alignment = 4
4109; MOVREL-NEXT:     private_segment_alignment = 4
4110; MOVREL-NEXT:     wavefront_size = 6
4111; MOVREL-NEXT:     call_convention = -1
4112; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
4113; MOVREL-NEXT:    .end_amd_kernel_code_t
4114; MOVREL-NEXT:  ; %bb.0: ; %entry
4115; MOVREL-NEXT:    s_load_dword s2, s[4:5], 0x8
4116; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4117; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
4118; MOVREL-NEXT:    s_cmp_eq_u32 s2, 1
4119; MOVREL-NEXT:    s_cselect_b32 s3, 2.0, 1.0
4120; MOVREL-NEXT:    s_cmp_eq_u32 s2, 2
4121; MOVREL-NEXT:    s_cselect_b32 s3, 0x40400000, s3
4122; MOVREL-NEXT:    s_cmp_eq_u32 s2, 3
4123; MOVREL-NEXT:    s_cselect_b32 s2, 4.0, s3
4124; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
4125; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
4126; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
4127; MOVREL-NEXT:    flat_store_dword v[0:1], v2
4128; MOVREL-NEXT:    s_endpgm
4129;
4130; GFX10-LABEL: dyn_extract_v4f32_s_s_s:
4131; GFX10:         .amd_kernel_code_t
4132; GFX10-NEXT:     amd_code_version_major = 1
4133; GFX10-NEXT:     amd_code_version_minor = 2
4134; GFX10-NEXT:     amd_machine_kind = 1
4135; GFX10-NEXT:     amd_machine_version_major = 10
4136; GFX10-NEXT:     amd_machine_version_minor = 1
4137; GFX10-NEXT:     amd_machine_version_stepping = 0
4138; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
4139; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
4140; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
4141; GFX10-NEXT:     granulated_wavefront_sgpr_count = 0
4142; GFX10-NEXT:     priority = 0
4143; GFX10-NEXT:     float_mode = 240
4144; GFX10-NEXT:     priv = 0
4145; GFX10-NEXT:     enable_dx10_clamp = 1
4146; GFX10-NEXT:     debug_mode = 0
4147; GFX10-NEXT:     enable_ieee_mode = 1
4148; GFX10-NEXT:     enable_wgp_mode = 1
4149; GFX10-NEXT:     enable_mem_ordered = 1
4150; GFX10-NEXT:     enable_fwd_progress = 0
4151; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4152; GFX10-NEXT:     user_sgpr_count = 6
4153; GFX10-NEXT:     enable_trap_handler = 0
4154; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
4155; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 0
4156; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 0
4157; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
4158; GFX10-NEXT:     enable_vgpr_workitem_id = 0
4159; GFX10-NEXT:     enable_exception_msb = 0
4160; GFX10-NEXT:     granulated_lds_size = 0
4161; GFX10-NEXT:     enable_exception = 0
4162; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
4163; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 0
4164; GFX10-NEXT:     enable_sgpr_queue_ptr = 0
4165; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4166; GFX10-NEXT:     enable_sgpr_dispatch_id = 0
4167; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
4168; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
4169; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4170; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4171; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4172; GFX10-NEXT:     enable_wavefront_size32 = 1
4173; GFX10-NEXT:     enable_ordered_append_gds = 0
4174; GFX10-NEXT:     private_element_size = 1
4175; GFX10-NEXT:     is_ptr64 = 1
4176; GFX10-NEXT:     is_dynamic_callstack = 0
4177; GFX10-NEXT:     is_debug_enabled = 0
4178; GFX10-NEXT:     is_xnack_enabled = 1
4179; GFX10-NEXT:     workitem_private_segment_byte_size = 0
4180; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
4181; GFX10-NEXT:     gds_segment_byte_size = 0
4182; GFX10-NEXT:     kernarg_segment_byte_size = 12
4183; GFX10-NEXT:     workgroup_fbarrier_count = 0
4184; GFX10-NEXT:     wavefront_sgpr_count = 6
4185; GFX10-NEXT:     workitem_vgpr_count = 2
4186; GFX10-NEXT:     reserved_vgpr_first = 0
4187; GFX10-NEXT:     reserved_vgpr_count = 0
4188; GFX10-NEXT:     reserved_sgpr_first = 0
4189; GFX10-NEXT:     reserved_sgpr_count = 0
4190; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4191; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
4192; GFX10-NEXT:     kernarg_segment_alignment = 4
4193; GFX10-NEXT:     group_segment_alignment = 4
4194; GFX10-NEXT:     private_segment_alignment = 4
4195; GFX10-NEXT:     wavefront_size = 5
4196; GFX10-NEXT:     call_convention = -1
4197; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
4198; GFX10-NEXT:    .end_amd_kernel_code_t
4199; GFX10-NEXT:  ; %bb.0: ; %entry
4200; GFX10-NEXT:    s_clause 0x1
4201; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8
4202; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4203; GFX10-NEXT:    v_mov_b32_e32 v1, 0
4204; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
4205; GFX10-NEXT:    s_cmp_eq_u32 s2, 1
4206; GFX10-NEXT:    s_cselect_b32 s3, 2.0, 1.0
4207; GFX10-NEXT:    s_cmp_eq_u32 s2, 2
4208; GFX10-NEXT:    s_cselect_b32 s3, 0x40400000, s3
4209; GFX10-NEXT:    s_cmp_eq_u32 s2, 3
4210; GFX10-NEXT:    s_cselect_b32 s2, 4.0, s3
4211; GFX10-NEXT:    v_mov_b32_e32 v0, s2
4212; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
4213; GFX10-NEXT:    s_endpgm
4214;
4215; GFX11-LABEL: dyn_extract_v4f32_s_s_s:
4216; GFX11:         .amd_kernel_code_t
4217; GFX11-NEXT:     amd_code_version_major = 1
4218; GFX11-NEXT:     amd_code_version_minor = 2
4219; GFX11-NEXT:     amd_machine_kind = 1
4220; GFX11-NEXT:     amd_machine_version_major = 11
4221; GFX11-NEXT:     amd_machine_version_minor = 0
4222; GFX11-NEXT:     amd_machine_version_stepping = 0
4223; GFX11-NEXT:     kernel_code_entry_byte_offset = 256
4224; GFX11-NEXT:     kernel_code_prefetch_byte_size = 0
4225; GFX11-NEXT:     granulated_workitem_vgpr_count = 0
4226; GFX11-NEXT:     granulated_wavefront_sgpr_count = 0
4227; GFX11-NEXT:     priority = 0
4228; GFX11-NEXT:     float_mode = 240
4229; GFX11-NEXT:     priv = 0
4230; GFX11-NEXT:     enable_dx10_clamp = 1
4231; GFX11-NEXT:     debug_mode = 0
4232; GFX11-NEXT:     enable_ieee_mode = 1
4233; GFX11-NEXT:     enable_wgp_mode = 1
4234; GFX11-NEXT:     enable_mem_ordered = 1
4235; GFX11-NEXT:     enable_fwd_progress = 0
4236; GFX11-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4237; GFX11-NEXT:     user_sgpr_count = 15
4238; GFX11-NEXT:     enable_trap_handler = 0
4239; GFX11-NEXT:     enable_sgpr_workgroup_id_x = 1
4240; GFX11-NEXT:     enable_sgpr_workgroup_id_y = 0
4241; GFX11-NEXT:     enable_sgpr_workgroup_id_z = 0
4242; GFX11-NEXT:     enable_sgpr_workgroup_info = 0
4243; GFX11-NEXT:     enable_vgpr_workitem_id = 0
4244; GFX11-NEXT:     enable_exception_msb = 0
4245; GFX11-NEXT:     granulated_lds_size = 0
4246; GFX11-NEXT:     enable_exception = 0
4247; GFX11-NEXT:     enable_sgpr_private_segment_buffer = 0
4248; GFX11-NEXT:     enable_sgpr_dispatch_ptr = 0
4249; GFX11-NEXT:     enable_sgpr_queue_ptr = 0
4250; GFX11-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4251; GFX11-NEXT:     enable_sgpr_dispatch_id = 0
4252; GFX11-NEXT:     enable_sgpr_flat_scratch_init = 0
4253; GFX11-NEXT:     enable_sgpr_private_segment_size = 0
4254; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4255; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4256; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4257; GFX11-NEXT:     enable_wavefront_size32 = 1
4258; GFX11-NEXT:     enable_ordered_append_gds = 0
4259; GFX11-NEXT:     private_element_size = 1
4260; GFX11-NEXT:     is_ptr64 = 1
4261; GFX11-NEXT:     is_dynamic_callstack = 0
4262; GFX11-NEXT:     is_debug_enabled = 0
4263; GFX11-NEXT:     is_xnack_enabled = 0
4264; GFX11-NEXT:     workitem_private_segment_byte_size = 0
4265; GFX11-NEXT:     workgroup_group_segment_byte_size = 0
4266; GFX11-NEXT:     gds_segment_byte_size = 0
4267; GFX11-NEXT:     kernarg_segment_byte_size = 12
4268; GFX11-NEXT:     workgroup_fbarrier_count = 0
4269; GFX11-NEXT:     wavefront_sgpr_count = 4
4270; GFX11-NEXT:     workitem_vgpr_count = 2
4271; GFX11-NEXT:     reserved_vgpr_first = 0
4272; GFX11-NEXT:     reserved_vgpr_count = 0
4273; GFX11-NEXT:     reserved_sgpr_first = 0
4274; GFX11-NEXT:     reserved_sgpr_count = 0
4275; GFX11-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4276; GFX11-NEXT:     debug_private_segment_buffer_sgpr = 0
4277; GFX11-NEXT:     kernarg_segment_alignment = 4
4278; GFX11-NEXT:     group_segment_alignment = 4
4279; GFX11-NEXT:     private_segment_alignment = 4
4280; GFX11-NEXT:     wavefront_size = 5
4281; GFX11-NEXT:     call_convention = -1
4282; GFX11-NEXT:     runtime_loader_kernel_symbol = 0
4283; GFX11-NEXT:    .end_amd_kernel_code_t
4284; GFX11-NEXT:  ; %bb.0: ; %entry
4285; GFX11-NEXT:    s_clause 0x1
4286; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8
4287; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
4288; GFX11-NEXT:    v_mov_b32_e32 v1, 0
4289; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4290; GFX11-NEXT:    s_cmp_eq_u32 s2, 1
4291; GFX11-NEXT:    s_cselect_b32 s3, 2.0, 1.0
4292; GFX11-NEXT:    s_cmp_eq_u32 s2, 2
4293; GFX11-NEXT:    s_cselect_b32 s3, 0x40400000, s3
4294; GFX11-NEXT:    s_cmp_eq_u32 s2, 3
4295; GFX11-NEXT:    s_cselect_b32 s2, 4.0, s3
4296; GFX11-NEXT:    v_mov_b32_e32 v0, s2
4297; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
4298; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4299; GFX11-NEXT:    s_endpgm
4300entry:
4301  %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel
4302  store float %ext, float addrspace(1)* %out
4303  ret void
4304}
4305
4306define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i32 %sel) {
4307; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s:
4308; GPRIDX:         .amd_kernel_code_t
4309; GPRIDX-NEXT:     amd_code_version_major = 1
4310; GPRIDX-NEXT:     amd_code_version_minor = 2
4311; GPRIDX-NEXT:     amd_machine_kind = 1
4312; GPRIDX-NEXT:     amd_machine_version_major = 9
4313; GPRIDX-NEXT:     amd_machine_version_minor = 0
4314; GPRIDX-NEXT:     amd_machine_version_stepping = 0
4315; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
4316; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
4317; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
4318; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 0
4319; GPRIDX-NEXT:     priority = 0
4320; GPRIDX-NEXT:     float_mode = 240
4321; GPRIDX-NEXT:     priv = 0
4322; GPRIDX-NEXT:     enable_dx10_clamp = 1
4323; GPRIDX-NEXT:     debug_mode = 0
4324; GPRIDX-NEXT:     enable_ieee_mode = 1
4325; GPRIDX-NEXT:     enable_wgp_mode = 0
4326; GPRIDX-NEXT:     enable_mem_ordered = 0
4327; GPRIDX-NEXT:     enable_fwd_progress = 0
4328; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4329; GPRIDX-NEXT:     user_sgpr_count = 6
4330; GPRIDX-NEXT:     enable_trap_handler = 0
4331; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
4332; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 0
4333; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 0
4334; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
4335; GPRIDX-NEXT:     enable_vgpr_workitem_id = 0
4336; GPRIDX-NEXT:     enable_exception_msb = 0
4337; GPRIDX-NEXT:     granulated_lds_size = 0
4338; GPRIDX-NEXT:     enable_exception = 0
4339; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
4340; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 0
4341; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 0
4342; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4343; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 0
4344; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
4345; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
4346; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4347; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4348; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4349; GPRIDX-NEXT:     enable_wavefront_size32 = 0
4350; GPRIDX-NEXT:     enable_ordered_append_gds = 0
4351; GPRIDX-NEXT:     private_element_size = 1
4352; GPRIDX-NEXT:     is_ptr64 = 1
4353; GPRIDX-NEXT:     is_dynamic_callstack = 0
4354; GPRIDX-NEXT:     is_debug_enabled = 0
4355; GPRIDX-NEXT:     is_xnack_enabled = 1
4356; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
4357; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
4358; GPRIDX-NEXT:     gds_segment_byte_size = 0
4359; GPRIDX-NEXT:     kernarg_segment_byte_size = 12
4360; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
4361; GPRIDX-NEXT:     wavefront_sgpr_count = 7
4362; GPRIDX-NEXT:     workitem_vgpr_count = 3
4363; GPRIDX-NEXT:     reserved_vgpr_first = 0
4364; GPRIDX-NEXT:     reserved_vgpr_count = 0
4365; GPRIDX-NEXT:     reserved_sgpr_first = 0
4366; GPRIDX-NEXT:     reserved_sgpr_count = 0
4367; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4368; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
4369; GPRIDX-NEXT:     kernarg_segment_alignment = 4
4370; GPRIDX-NEXT:     group_segment_alignment = 4
4371; GPRIDX-NEXT:     private_segment_alignment = 4
4372; GPRIDX-NEXT:     wavefront_size = 6
4373; GPRIDX-NEXT:     call_convention = -1
4374; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
4375; GPRIDX-NEXT:    .end_amd_kernel_code_t
4376; GPRIDX-NEXT:  ; %bb.0: ; %entry
4377; GPRIDX-NEXT:    s_load_dword s6, s[4:5], 0x8
4378; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4379; GPRIDX-NEXT:    s_mov_b32 s2, 0
4380; GPRIDX-NEXT:    s_mov_b32 s3, 0x40080000
4381; GPRIDX-NEXT:    v_mov_b32_e32 v2, 0
4382; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
4383; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 1
4384; GPRIDX-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
4385; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 2
4386; GPRIDX-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
4387; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 3
4388; GPRIDX-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
4389; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
4390; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
4391; GPRIDX-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
4392; GPRIDX-NEXT:    s_endpgm
4393;
4394; MOVREL-LABEL: dyn_extract_v4f64_s_s_s:
4395; MOVREL:         .amd_kernel_code_t
4396; MOVREL-NEXT:     amd_code_version_major = 1
4397; MOVREL-NEXT:     amd_code_version_minor = 2
4398; MOVREL-NEXT:     amd_machine_kind = 1
4399; MOVREL-NEXT:     amd_machine_version_major = 8
4400; MOVREL-NEXT:     amd_machine_version_minor = 0
4401; MOVREL-NEXT:     amd_machine_version_stepping = 3
4402; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
4403; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
4404; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
4405; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 0
4406; MOVREL-NEXT:     priority = 0
4407; MOVREL-NEXT:     float_mode = 240
4408; MOVREL-NEXT:     priv = 0
4409; MOVREL-NEXT:     enable_dx10_clamp = 1
4410; MOVREL-NEXT:     debug_mode = 0
4411; MOVREL-NEXT:     enable_ieee_mode = 1
4412; MOVREL-NEXT:     enable_wgp_mode = 0
4413; MOVREL-NEXT:     enable_mem_ordered = 0
4414; MOVREL-NEXT:     enable_fwd_progress = 0
4415; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4416; MOVREL-NEXT:     user_sgpr_count = 6
4417; MOVREL-NEXT:     enable_trap_handler = 0
4418; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
4419; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 0
4420; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 0
4421; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
4422; MOVREL-NEXT:     enable_vgpr_workitem_id = 0
4423; MOVREL-NEXT:     enable_exception_msb = 0
4424; MOVREL-NEXT:     granulated_lds_size = 0
4425; MOVREL-NEXT:     enable_exception = 0
4426; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
4427; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 0
4428; MOVREL-NEXT:     enable_sgpr_queue_ptr = 0
4429; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4430; MOVREL-NEXT:     enable_sgpr_dispatch_id = 0
4431; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
4432; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
4433; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4434; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4435; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4436; MOVREL-NEXT:     enable_wavefront_size32 = 0
4437; MOVREL-NEXT:     enable_ordered_append_gds = 0
4438; MOVREL-NEXT:     private_element_size = 1
4439; MOVREL-NEXT:     is_ptr64 = 1
4440; MOVREL-NEXT:     is_dynamic_callstack = 0
4441; MOVREL-NEXT:     is_debug_enabled = 0
4442; MOVREL-NEXT:     is_xnack_enabled = 0
4443; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
4444; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
4445; MOVREL-NEXT:     gds_segment_byte_size = 0
4446; MOVREL-NEXT:     kernarg_segment_byte_size = 12
4447; MOVREL-NEXT:     workgroup_fbarrier_count = 0
4448; MOVREL-NEXT:     wavefront_sgpr_count = 7
4449; MOVREL-NEXT:     workitem_vgpr_count = 4
4450; MOVREL-NEXT:     reserved_vgpr_first = 0
4451; MOVREL-NEXT:     reserved_vgpr_count = 0
4452; MOVREL-NEXT:     reserved_sgpr_first = 0
4453; MOVREL-NEXT:     reserved_sgpr_count = 0
4454; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4455; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
4456; MOVREL-NEXT:     kernarg_segment_alignment = 4
4457; MOVREL-NEXT:     group_segment_alignment = 4
4458; MOVREL-NEXT:     private_segment_alignment = 4
4459; MOVREL-NEXT:     wavefront_size = 6
4460; MOVREL-NEXT:     call_convention = -1
4461; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
4462; MOVREL-NEXT:    .end_amd_kernel_code_t
4463; MOVREL-NEXT:  ; %bb.0: ; %entry
4464; MOVREL-NEXT:    s_load_dword s6, s[4:5], 0x8
4465; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4466; MOVREL-NEXT:    s_mov_b32 s2, 0
4467; MOVREL-NEXT:    s_mov_b32 s3, 0x40080000
4468; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
4469; MOVREL-NEXT:    s_cmp_eq_u32 s6, 1
4470; MOVREL-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
4471; MOVREL-NEXT:    s_cmp_eq_u32 s6, 2
4472; MOVREL-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
4473; MOVREL-NEXT:    s_cmp_eq_u32 s6, 3
4474; MOVREL-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
4475; MOVREL-NEXT:    v_mov_b32_e32 v0, s2
4476; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
4477; MOVREL-NEXT:    v_mov_b32_e32 v1, s3
4478; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
4479; MOVREL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
4480; MOVREL-NEXT:    s_endpgm
4481;
4482; GFX10-LABEL: dyn_extract_v4f64_s_s_s:
4483; GFX10:         .amd_kernel_code_t
4484; GFX10-NEXT:     amd_code_version_major = 1
4485; GFX10-NEXT:     amd_code_version_minor = 2
4486; GFX10-NEXT:     amd_machine_kind = 1
4487; GFX10-NEXT:     amd_machine_version_major = 10
4488; GFX10-NEXT:     amd_machine_version_minor = 1
4489; GFX10-NEXT:     amd_machine_version_stepping = 0
4490; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
4491; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
4492; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
4493; GFX10-NEXT:     granulated_wavefront_sgpr_count = 0
4494; GFX10-NEXT:     priority = 0
4495; GFX10-NEXT:     float_mode = 240
4496; GFX10-NEXT:     priv = 0
4497; GFX10-NEXT:     enable_dx10_clamp = 1
4498; GFX10-NEXT:     debug_mode = 0
4499; GFX10-NEXT:     enable_ieee_mode = 1
4500; GFX10-NEXT:     enable_wgp_mode = 1
4501; GFX10-NEXT:     enable_mem_ordered = 1
4502; GFX10-NEXT:     enable_fwd_progress = 0
4503; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4504; GFX10-NEXT:     user_sgpr_count = 6
4505; GFX10-NEXT:     enable_trap_handler = 0
4506; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
4507; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 0
4508; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 0
4509; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
4510; GFX10-NEXT:     enable_vgpr_workitem_id = 0
4511; GFX10-NEXT:     enable_exception_msb = 0
4512; GFX10-NEXT:     granulated_lds_size = 0
4513; GFX10-NEXT:     enable_exception = 0
4514; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
4515; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 0
4516; GFX10-NEXT:     enable_sgpr_queue_ptr = 0
4517; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4518; GFX10-NEXT:     enable_sgpr_dispatch_id = 0
4519; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
4520; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
4521; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4522; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4523; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4524; GFX10-NEXT:     enable_wavefront_size32 = 1
4525; GFX10-NEXT:     enable_ordered_append_gds = 0
4526; GFX10-NEXT:     private_element_size = 1
4527; GFX10-NEXT:     is_ptr64 = 1
4528; GFX10-NEXT:     is_dynamic_callstack = 0
4529; GFX10-NEXT:     is_debug_enabled = 0
4530; GFX10-NEXT:     is_xnack_enabled = 1
4531; GFX10-NEXT:     workitem_private_segment_byte_size = 0
4532; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
4533; GFX10-NEXT:     gds_segment_byte_size = 0
4534; GFX10-NEXT:     kernarg_segment_byte_size = 12
4535; GFX10-NEXT:     workgroup_fbarrier_count = 0
4536; GFX10-NEXT:     wavefront_sgpr_count = 7
4537; GFX10-NEXT:     workitem_vgpr_count = 3
4538; GFX10-NEXT:     reserved_vgpr_first = 0
4539; GFX10-NEXT:     reserved_vgpr_count = 0
4540; GFX10-NEXT:     reserved_sgpr_first = 0
4541; GFX10-NEXT:     reserved_sgpr_count = 0
4542; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4543; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
4544; GFX10-NEXT:     kernarg_segment_alignment = 4
4545; GFX10-NEXT:     group_segment_alignment = 4
4546; GFX10-NEXT:     private_segment_alignment = 4
4547; GFX10-NEXT:     wavefront_size = 5
4548; GFX10-NEXT:     call_convention = -1
4549; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
4550; GFX10-NEXT:    .end_amd_kernel_code_t
4551; GFX10-NEXT:  ; %bb.0: ; %entry
4552; GFX10-NEXT:    s_clause 0x1
4553; GFX10-NEXT:    s_load_dword s6, s[4:5], 0x8
4554; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4555; GFX10-NEXT:    s_mov_b32 s2, 0
4556; GFX10-NEXT:    s_mov_b32 s3, 0x40080000
4557; GFX10-NEXT:    v_mov_b32_e32 v2, 0
4558; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
4559; GFX10-NEXT:    s_cmp_eq_u32 s6, 1
4560; GFX10-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
4561; GFX10-NEXT:    s_cmp_eq_u32 s6, 2
4562; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
4563; GFX10-NEXT:    s_cmp_eq_u32 s6, 3
4564; GFX10-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
4565; GFX10-NEXT:    v_mov_b32_e32 v0, s2
4566; GFX10-NEXT:    v_mov_b32_e32 v1, s3
4567; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
4568; GFX10-NEXT:    s_endpgm
4569;
4570; GFX11-LABEL: dyn_extract_v4f64_s_s_s:
4571; GFX11:         .amd_kernel_code_t
4572; GFX11-NEXT:     amd_code_version_major = 1
4573; GFX11-NEXT:     amd_code_version_minor = 2
4574; GFX11-NEXT:     amd_machine_kind = 1
4575; GFX11-NEXT:     amd_machine_version_major = 11
4576; GFX11-NEXT:     amd_machine_version_minor = 0
4577; GFX11-NEXT:     amd_machine_version_stepping = 0
4578; GFX11-NEXT:     kernel_code_entry_byte_offset = 256
4579; GFX11-NEXT:     kernel_code_prefetch_byte_size = 0
4580; GFX11-NEXT:     granulated_workitem_vgpr_count = 0
4581; GFX11-NEXT:     granulated_wavefront_sgpr_count = 0
4582; GFX11-NEXT:     priority = 0
4583; GFX11-NEXT:     float_mode = 240
4584; GFX11-NEXT:     priv = 0
4585; GFX11-NEXT:     enable_dx10_clamp = 1
4586; GFX11-NEXT:     debug_mode = 0
4587; GFX11-NEXT:     enable_ieee_mode = 1
4588; GFX11-NEXT:     enable_wgp_mode = 1
4589; GFX11-NEXT:     enable_mem_ordered = 1
4590; GFX11-NEXT:     enable_fwd_progress = 0
4591; GFX11-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4592; GFX11-NEXT:     user_sgpr_count = 15
4593; GFX11-NEXT:     enable_trap_handler = 0
4594; GFX11-NEXT:     enable_sgpr_workgroup_id_x = 1
4595; GFX11-NEXT:     enable_sgpr_workgroup_id_y = 0
4596; GFX11-NEXT:     enable_sgpr_workgroup_id_z = 0
4597; GFX11-NEXT:     enable_sgpr_workgroup_info = 0
4598; GFX11-NEXT:     enable_vgpr_workitem_id = 0
4599; GFX11-NEXT:     enable_exception_msb = 0
4600; GFX11-NEXT:     granulated_lds_size = 0
4601; GFX11-NEXT:     enable_exception = 0
4602; GFX11-NEXT:     enable_sgpr_private_segment_buffer = 0
4603; GFX11-NEXT:     enable_sgpr_dispatch_ptr = 0
4604; GFX11-NEXT:     enable_sgpr_queue_ptr = 0
4605; GFX11-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4606; GFX11-NEXT:     enable_sgpr_dispatch_id = 0
4607; GFX11-NEXT:     enable_sgpr_flat_scratch_init = 0
4608; GFX11-NEXT:     enable_sgpr_private_segment_size = 0
4609; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4610; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4611; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4612; GFX11-NEXT:     enable_wavefront_size32 = 1
4613; GFX11-NEXT:     enable_ordered_append_gds = 0
4614; GFX11-NEXT:     private_element_size = 1
4615; GFX11-NEXT:     is_ptr64 = 1
4616; GFX11-NEXT:     is_dynamic_callstack = 0
4617; GFX11-NEXT:     is_debug_enabled = 0
4618; GFX11-NEXT:     is_xnack_enabled = 0
4619; GFX11-NEXT:     workitem_private_segment_byte_size = 0
4620; GFX11-NEXT:     workgroup_group_segment_byte_size = 0
4621; GFX11-NEXT:     gds_segment_byte_size = 0
4622; GFX11-NEXT:     kernarg_segment_byte_size = 12
4623; GFX11-NEXT:     workgroup_fbarrier_count = 0
4624; GFX11-NEXT:     wavefront_sgpr_count = 7
4625; GFX11-NEXT:     workitem_vgpr_count = 3
4626; GFX11-NEXT:     reserved_vgpr_first = 0
4627; GFX11-NEXT:     reserved_vgpr_count = 0
4628; GFX11-NEXT:     reserved_sgpr_first = 0
4629; GFX11-NEXT:     reserved_sgpr_count = 0
4630; GFX11-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4631; GFX11-NEXT:     debug_private_segment_buffer_sgpr = 0
4632; GFX11-NEXT:     kernarg_segment_alignment = 4
4633; GFX11-NEXT:     group_segment_alignment = 4
4634; GFX11-NEXT:     private_segment_alignment = 4
4635; GFX11-NEXT:     wavefront_size = 5
4636; GFX11-NEXT:     call_convention = -1
4637; GFX11-NEXT:     runtime_loader_kernel_symbol = 0
4638; GFX11-NEXT:    .end_amd_kernel_code_t
4639; GFX11-NEXT:  ; %bb.0: ; %entry
4640; GFX11-NEXT:    s_clause 0x1
4641; GFX11-NEXT:    s_load_b32 s6, s[0:1], 0x8
4642; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
4643; GFX11-NEXT:    s_mov_b32 s2, 0
4644; GFX11-NEXT:    s_mov_b32 s3, 0x40080000
4645; GFX11-NEXT:    v_mov_b32_e32 v2, 0
4646; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4647; GFX11-NEXT:    s_cmp_eq_u32 s6, 1
4648; GFX11-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
4649; GFX11-NEXT:    s_cmp_eq_u32 s6, 2
4650; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
4651; GFX11-NEXT:    s_cmp_eq_u32 s6, 3
4652; GFX11-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
4653; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
4654; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
4655; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
4656; GFX11-NEXT:    s_endpgm
4657entry:
4658  %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel
4659  store double %ext, double addrspace(1)* %out
4660  ret void
4661}
4662
4663define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) {
4664; GPRIDX-LABEL: v_extract_v64i32_7:
4665; GPRIDX:       ; %bb.0:
4666; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4667; GPRIDX-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:16
4668; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4669; GPRIDX-NEXT:    v_mov_b32_e32 v0, v7
4670; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4671;
4672; MOVREL-LABEL: v_extract_v64i32_7:
4673; MOVREL:       ; %bb.0:
4674; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4675; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
4676; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4677; MOVREL-NEXT:    flat_load_dwordx4 v[4:7], v[0:1]
4678; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4679; MOVREL-NEXT:    v_mov_b32_e32 v0, v7
4680; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4681;
4682; GFX10-LABEL: v_extract_v64i32_7:
4683; GFX10:       ; %bb.0:
4684; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4685; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4686; GFX10-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:16
4687; GFX10-NEXT:    s_waitcnt vmcnt(0)
4688; GFX10-NEXT:    v_mov_b32_e32 v0, v7
4689; GFX10-NEXT:    s_setpc_b64 s[30:31]
4690;
4691; GFX11-LABEL: v_extract_v64i32_7:
4692; GFX11:       ; %bb.0:
4693; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4694; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4695; GFX11-NEXT:    global_load_b128 v[4:7], v[0:1], off offset:16
4696; GFX11-NEXT:    s_waitcnt vmcnt(0)
4697; GFX11-NEXT:    v_mov_b32_e32 v0, v7
4698; GFX11-NEXT:    s_setpc_b64 s[30:31]
4699  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4700  %elt = extractelement <64 x i32> %vec, i32 7
4701  ret i32 %elt
4702}
4703
4704define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) {
4705; GPRIDX-LABEL: v_extract_v64i32_32:
4706; GPRIDX:       ; %bb.0:
4707; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4708; GPRIDX-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4709; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4710; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4711;
4712; MOVREL-LABEL: v_extract_v64i32_32:
4713; MOVREL:       ; %bb.0:
4714; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4715; MOVREL-NEXT:    s_mov_b64 s[4:5], 0x80
4716; MOVREL-NEXT:    v_mov_b32_e32 v2, s4
4717; MOVREL-NEXT:    v_mov_b32_e32 v3, s5
4718; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
4719; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
4720; MOVREL-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4721; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4722; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4723;
4724; GFX10-LABEL: v_extract_v64i32_32:
4725; GFX10:       ; %bb.0:
4726; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4727; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4728; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4729; GFX10-NEXT:    s_waitcnt vmcnt(0)
4730; GFX10-NEXT:    s_setpc_b64 s[30:31]
4731;
4732; GFX11-LABEL: v_extract_v64i32_32:
4733; GFX11:       ; %bb.0:
4734; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4735; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4736; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128
4737; GFX11-NEXT:    s_waitcnt vmcnt(0)
4738; GFX11-NEXT:    s_setpc_b64 s[30:31]
4739  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4740  %elt = extractelement <64 x i32> %vec, i32 32
4741  ret i32 %elt
4742}
4743
4744define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) {
4745; GPRIDX-LABEL: v_extract_v64i32_33:
4746; GPRIDX:       ; %bb.0:
4747; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4748; GPRIDX-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4749; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4750; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
4751; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4752;
4753; MOVREL-LABEL: v_extract_v64i32_33:
4754; MOVREL:       ; %bb.0:
4755; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4756; MOVREL-NEXT:    s_mov_b64 s[4:5], 0x80
4757; MOVREL-NEXT:    v_mov_b32_e32 v2, s4
4758; MOVREL-NEXT:    v_mov_b32_e32 v3, s5
4759; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
4760; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
4761; MOVREL-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4762; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4763; MOVREL-NEXT:    v_mov_b32_e32 v0, v1
4764; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4765;
4766; GFX10-LABEL: v_extract_v64i32_33:
4767; GFX10:       ; %bb.0:
4768; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4769; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4770; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:128
4771; GFX10-NEXT:    s_waitcnt vmcnt(0)
4772; GFX10-NEXT:    v_mov_b32_e32 v0, v1
4773; GFX10-NEXT:    s_setpc_b64 s[30:31]
4774;
4775; GFX11-LABEL: v_extract_v64i32_33:
4776; GFX11:       ; %bb.0:
4777; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4778; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4779; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128
4780; GFX11-NEXT:    s_waitcnt vmcnt(0)
4781; GFX11-NEXT:    v_mov_b32_e32 v0, v1
4782; GFX11-NEXT:    s_setpc_b64 s[30:31]
4783  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4784  %elt = extractelement <64 x i32> %vec, i32 33
4785  ret i32 %elt
4786}
4787
4788define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) {
4789; GPRIDX-LABEL: v_extract_v64i32_37:
4790; GPRIDX:       ; %bb.0:
4791; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4792; GPRIDX-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:144
4793; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4794; GPRIDX-NEXT:    v_mov_b32_e32 v0, v5
4795; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4796;
4797; MOVREL-LABEL: v_extract_v64i32_37:
4798; MOVREL:       ; %bb.0:
4799; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4800; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 0x90, v0
4801; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4802; MOVREL-NEXT:    flat_load_dwordx4 v[4:7], v[0:1]
4803; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4804; MOVREL-NEXT:    v_mov_b32_e32 v0, v5
4805; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4806;
4807; GFX10-LABEL: v_extract_v64i32_37:
4808; GFX10:       ; %bb.0:
4809; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4810; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4811; GFX10-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:144
4812; GFX10-NEXT:    s_waitcnt vmcnt(0)
4813; GFX10-NEXT:    v_mov_b32_e32 v0, v5
4814; GFX10-NEXT:    s_setpc_b64 s[30:31]
4815;
4816; GFX11-LABEL: v_extract_v64i32_37:
4817; GFX11:       ; %bb.0:
4818; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4819; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4820; GFX11-NEXT:    global_load_b128 v[4:7], v[0:1], off offset:144
4821; GFX11-NEXT:    s_waitcnt vmcnt(0)
4822; GFX11-NEXT:    v_mov_b32_e32 v0, v5
4823; GFX11-NEXT:    s_setpc_b64 s[30:31]
4824  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
4825  %elt = extractelement <64 x i32> %vec, i32 37
4826  ret i32 %elt
4827}
4828