1; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
2
3; GCN-LABEL: {{^}}float4_extelt:
4; GCN-NOT: buffer_
5; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
6; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
7; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
8; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
9; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
10; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
11; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]]
12; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2.0, [[V1]], [[C2]]
13; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4.0, [[V2]], [[C3]]
14; GCN:     store_dword v[{{[0-9:]+}}], [[V3]]
15define amdgpu_kernel void @float4_extelt(float addrspace(1)* %out, i32 %sel) {
16entry:
17  %ext = extractelement <4 x float> <float 0.0, float 1.0, float 2.0, float 4.0>, i32 %sel
18  store float %ext, float addrspace(1)* %out
19  ret void
20}
21
22; GCN-LABEL: {{^}}int4_extelt:
23; GCN-NOT: buffer_
24; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 2
25; GCN-DAG: s_cmp_eq_u32 [[IDX]], 1
26; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
27; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
28; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1, [[C1]]
29; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2, [[V1]], vcc
30; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4, [[V2]], vcc
31; GCN: store_dword v[{{[0-9:]+}}], [[V3]]
32define amdgpu_kernel void @int4_extelt(i32 addrspace(1)* %out, i32 %sel) {
33entry:
34  %ext = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 4>, i32 %sel
35  store i32 %ext, i32 addrspace(1)* %out
36  ret void
37}
38
39; GCN-LABEL: {{^}}double4_extelt:
40; GCN-NOT: buffer_
41; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
42; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
43; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2
44; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
45; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3
46; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
47; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]]
48; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]]
49; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]]
50; GCN: store_dwordx2 v[{{[0-9:]+}}]
51define amdgpu_kernel void @double4_extelt(double addrspace(1)* %out, i32 %sel) {
52entry:
53  %ext = extractelement <4 x double> <double 0.01, double 1.01, double 2.01, double 4.01>, i32 %sel
54  store double %ext, double addrspace(1)* %out
55  ret void
56}
57
58; GCN-LABEL: {{^}}double5_extelt:
59; GCN-NOT: buffer_
60; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
61; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
62; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2
63; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
64; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3
65; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
66; GCN-DAG: s_cmp_eq_u32 [[IDX]], 4
67; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0
68; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]]
69; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]]
70; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]]
71; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C4]]
72; GCN: store_dwordx2 v[{{[0-9:]+}}]
73define amdgpu_kernel void @double5_extelt(double addrspace(1)* %out, i32 %sel) {
74entry:
75  %ext = extractelement <5 x double> <double 0.01, double 1.01, double 2.01, double 4.01, double 5.01>, i32 %sel
76  store double %ext, double addrspace(1)* %out
77  ret void
78}
79
80; GCN-LABEL: {{^}}half4_extelt:
81; GCN-NOT: buffer_
82; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00
83; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200
84; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4
85; GCN:     s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]]
86; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
87; GCN:     store_short v[{{[0-9:]+}}], v[[VRL]]
88define amdgpu_kernel void @half4_extelt(half addrspace(1)* %out, i32 %sel) {
89entry:
90  %ext = extractelement <4 x half> <half 1.0, half 2.0, half 3.0, half 4.0>, i32 %sel
91  store half %ext, half addrspace(1)* %out
92  ret void
93}
94
95; GCN-LABEL: {{^}}float2_extelt:
96; GCN-NOT: buffer_
97; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
98; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
99; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]]
100; GCN: store_dword v[{{[0-9:]+}}], [[V1]]
101define amdgpu_kernel void @float2_extelt(float addrspace(1)* %out, i32 %sel) {
102entry:
103  %ext = extractelement <2 x float> <float 0.0, float 1.0>, i32 %sel
104  store float %ext, float addrspace(1)* %out
105  ret void
106}
107
108; GCN-LABEL: {{^}}double2_extelt:
109; GCN-NOT: buffer_
110; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
111; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
112; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]]
113; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]]
114; GCN: store_dwordx2 v[{{[0-9:]+}}]
115define amdgpu_kernel void @double2_extelt(double addrspace(1)* %out, i32 %sel) {
116entry:
117  %ext = extractelement <2 x double> <double 0.01, double 1.01>, i32 %sel
118  store double %ext, double addrspace(1)* %out
119  ret void
120}
121
122; GCN-LABEL: {{^}}half8_extelt:
123; GCN-NOT: buffer_
124; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
125; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
126; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
127; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
128; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
129; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
130; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4
131; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0
132; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5
133; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0
134; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6
135; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0
136; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7
137; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0
138; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
139; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
140; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
141; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
142; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
143; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
144; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
145; GCN:     store_short v[{{[0-9:]+}}], [[V7]]
146define amdgpu_kernel void @half8_extelt(half addrspace(1)* %out, i32 %sel) {
147entry:
148  %ext = extractelement <8 x half> <half 1.0, half 2.0, half 3.0, half 4.0, half 5.0, half 6.0, half 7.0, half 8.0>, i32 %sel
149  store half %ext, half addrspace(1)* %out
150  ret void
151}
152
153; GCN-LABEL: {{^}}short8_extelt:
154; GCN-NOT: buffer_
155; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
156; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
157; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
158; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
159; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
160; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
161; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4
162; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0
163; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5
164; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0
165; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6
166; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0
167; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7
168; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0
169; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
170; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
171; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
172; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
173; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
174; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
175; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
176; GCN:     store_short v[{{[0-9:]+}}], [[V7]]
177define amdgpu_kernel void @short8_extelt(i16 addrspace(1)* %out, i32 %sel) {
178entry:
179  %ext = extractelement <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i32 %sel
180  store i16 %ext, i16 addrspace(1)* %out
181  ret void
182}
183
184; GCN-LABEL: {{^}}float8_extelt:
185; GCN-NOT: buffer_
186; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
187; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
188; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
189; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
190; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
191; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
192; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4
193; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0
194; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5
195; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0
196; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6
197; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0
198; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7
199; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0
200; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
201; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
202; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
203; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
204; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
205; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
206; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
207; GCN:     store_dword v[{{[0-9:]+}}], [[V7]]
208define amdgpu_kernel void @float8_extelt(float addrspace(1)* %out, i32 %sel) {
209entry:
210  %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
211  store float %ext, float addrspace(1)* %out
212  ret void
213}
214
215; GCN-LABEL: {{^}}double8_extelt:
216; GCN-NOT: buffer_
217; GCN-NOT: s_or_b32
218; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
219; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]]
220; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
221; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
222; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
223; GCN:     store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
224define amdgpu_kernel void @double8_extelt(double addrspace(1)* %out, i32 %sel) {
225entry:
226  %ext = extractelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, i32 %sel
227  store double %ext, double addrspace(1)* %out
228  ret void
229}
230
231; GCN-LABEL: {{^}}double7_extelt:
232; GCN-NOT: buffer_
233; GCN-NOT: s_or_b32
234; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
235; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]]
236; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
237; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
238; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
239; GCN:     store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
240define amdgpu_kernel void @double7_extelt(double addrspace(1)* %out, i32 %sel) {
241entry:
242  %ext = extractelement <7 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, i32 %sel
243  store double %ext, double addrspace(1)* %out
244  ret void
245}
246
247; GCN-LABEL: {{^}}float16_extelt:
248; GCN-NOT: buffer_
249; GCN-DAG: s_mov_b32 m0,
250; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0
251; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
252; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
253; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
254; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000
255; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000
256; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000
257; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000
258; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000
259; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000
260; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000
261; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000
262; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000
263; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000
264; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000
265; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000
266; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]]
267; GCN:     store_dword v[{{[0-9:]+}}], [[RES]]
268define amdgpu_kernel void @float16_extelt(float addrspace(1)* %out, i32 %sel) {
269entry:
270  %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel
271  store float %ext, float addrspace(1)* %out
272  ret void
273}
274
275; GCN-LABEL: {{^}}double15_extelt:
276; GCN-NOT: buffer_
277; GCN-NOT: s_or_b32
278; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
279; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]]
280; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
281; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
282; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
283; GCN:     store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
284define amdgpu_kernel void @double15_extelt(double addrspace(1)* %out, i32 %sel) {
285entry:
286  %ext = extractelement <15 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>, i32 %sel
287  store double %ext, double addrspace(1)* %out
288  ret void
289}
290
291; GCN-LABEL: {{^}}double16_extelt:
292; GCN-NOT: buffer_
293; GCN-NOT: s_or_b32
294; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}}
295; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]]
296; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]]
297; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]]
298; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]]
299; GCN:     store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]]
300define amdgpu_kernel void @double16_extelt(double addrspace(1)* %out, i32 %sel) {
301entry:
302  %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
303  store double %ext, double addrspace(1)* %out
304  ret void
305}
306
307; GCN-LABEL: {{^}}float32_extelt:
308; GCN-NOT: buffer_
309; GCN-DAG: s_mov_b32 m0,
310; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0
311; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
312; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
313; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
314; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000
315; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000
316; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000
317; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000
318; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000
319; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000
320; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000
321; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000
322; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000
323; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000
324; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000
325; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000
326; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41880000
327; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
328; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41980000
329; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000
330; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000
331; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000
332; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b80000
333; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c00000
334; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c80000
335; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d00000
336; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d80000
337; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e00000
338; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e80000
339; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f00000
340; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f80000
341; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x42000000
342; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]]
343; GCN:     store_dword v[{{[0-9:]+}}], [[RES]]
344define amdgpu_kernel void @float32_extelt(float addrspace(1)* %out, i32 %sel) {
345entry:
346  %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
347  store float %ext, float addrspace(1)* %out
348  ret void
349}
350
351; GCN-LABEL: {{^}}byte8_extelt:
352; GCN-NOT: buffer_
353; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201
354; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x8070605
355; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 3
356; GCN:     s_lshr_b64 s{{\[}}[[RL:[0-9]+]]:{{[0-9]+}}], s{{\[}}[[SL]]:[[SH]]], [[SEL]]
357; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]]
358; GCN:     store_byte v[{{[0-9:]+}}], v[[VRL]]
359define amdgpu_kernel void @byte8_extelt(i8 addrspace(1)* %out, i32 %sel) {
360entry:
361  %ext = extractelement <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i32 %sel
362  store i8 %ext, i8 addrspace(1)* %out
363  ret void
364}
365
366; GCN-LABEL: {{^}}byte16_extelt:
367; GCN-NOT: buffer_
368; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1
369; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0
370; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2
371; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0
372; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3
373; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0
374; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4
375; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0
376; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5
377; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0
378; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6
379; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0
380; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7
381; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0
382; GCN-DAG: s_cmp_lg_u32 [[IDX]], 8
383; GCN-DAG: s_cselect_b64 [[C8:[^,]+]], -1, 0
384; GCN-DAG: s_cmp_lg_u32 [[IDX]], 9
385; GCN-DAG: s_cselect_b64 [[C9:[^,]+]], -1, 0
386; GCN-DAG: s_cmp_lg_u32 [[IDX]], 10
387; GCN-DAG: s_cselect_b64 [[C10:[^,]+]], -1, 0
388; GCN-DAG: s_cmp_lg_u32 [[IDX]], 11
389; GCN-DAG: s_cselect_b64 [[C11:[^,]+]], -1, 0
390; GCN-DAG: s_cmp_lg_u32 [[IDX]], 12
391; GCN-DAG: s_cselect_b64 [[C12:[^,]+]], -1, 0
392; GCN-DAG: s_cmp_lg_u32 [[IDX]], 13
393; GCN-DAG: s_cselect_b64 [[C13:[^,]+]], -1, 0
394; GCN-DAG: s_cmp_lg_u32 [[IDX]], 14
395; GCN-DAG: s_cselect_b64 [[C14:[^,]+]], -1, 0
396; GCN-DAG: s_cmp_lg_u32 [[IDX]], 15
397; GCN-DAG: s_cselect_b64 [[C15:[^,]+]], -1, 0
398; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]]
399; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]]
400; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]]
401; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]]
402; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]]
403; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]]
404; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]]
405; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V8:v[0-9]+]], {{[^,]+}}, [[V7]], [[C8]]
406; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V9:v[0-9]+]], {{[^,]+}}, [[V8]], [[C8]]
407; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V10:v[0-9]+]], {{[^,]+}}, [[V9]], [[C10]]
408; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V11:v[0-9]+]], {{[^,]+}}, [[V10]], [[C11]]
409; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V12:v[0-9]+]], {{[^,]+}}, [[V11]], [[C12]]
410; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V13:v[0-9]+]], {{[^,]+}}, [[V12]], [[C13]]
411; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V14:v[0-9]+]], {{[^,]+}}, [[V13]], [[C14]]
412; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V15:v[0-9]+]], {{[^,]+}}, [[V14]], [[C15]]
413; GCN:     store_byte v[{{[0-9:]+}}], [[V15]]
414define amdgpu_kernel void @byte16_extelt(i8 addrspace(1)* %out, i32 %sel) {
415entry:
416  %ext = extractelement <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, i32 %sel
417  store i8 %ext, i8 addrspace(1)* %out
418  ret void
419}
420
421; GCN-LABEL: {{^}}bit4_extelt:
422; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
423; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
424; GCN-DAG: buffer_store_byte [[ZERO]],
425; GCN-DAG: buffer_store_byte [[ONE]],
426; GCN-DAG: buffer_store_byte [[ZERO]],
427; GCN-DAG: buffer_store_byte [[ONE]],
428; GCN:     buffer_load_ubyte [[LOAD:v[0-9]+]],
429; GCN:     v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]]
430; GCN:     flat_store_dword v[{{[0-9:]+}}], [[RES]]
431define amdgpu_kernel void @bit4_extelt(i32 addrspace(1)* %out, i32 %sel) {
432entry:
433  %ext = extractelement <4 x i1> <i1 0, i1 1, i1 0, i1 1>, i32 %sel
434  %zext = zext i1 %ext to i32
435  store i32 %zext, i32 addrspace(1)* %out
436  ret void
437}
438
439; GCN-LABEL: {{^}}bit128_extelt:
440; GCN-NOT: buffer_
441; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1
442; GCN: s_cmpk_lg_i32 {{s[0-9]+}}, 0x7f
443; GCN: s_cselect_b64 [[CL:[^,]+]], -1, 0
444; GCN: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, [[V1]], [[CL]]
445; GCN:     v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]]
446; GCN:     store_dword v[{{[0-9:]+}}], [[RES]]
447define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) {
448entry:
449  %ext = extractelement <128 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, i32 %sel
450  %zext = zext i1 %ext to i32
451  store i32 %zext, i32 addrspace(1)* %out
452  ret void
453}
454
455; GCN-LABEL: {{^}}float32_extelt_vec:
456; GCN-NOT: buffer_
457; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0
458; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1.0, 2.0, [[CC1]]
459; GCN-DAG: v_mov_b32_e32 [[LASTVAL:v[0-9]+]], 0x42000000
460; GCN-DAG: v_cmp_ne_u32_e32 [[LASTCC:[^,]+]], 31, v0
461; GCN-DAG: v_cndmask_b32_e{{32|64}} v0, [[LASTVAL]], v{{[0-9]+}}, [[LASTCC]]
462define float @float32_extelt_vec(i32 %sel) {
463entry:
464  %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
465  ret float %ext
466}
467
468; GCN-LABEL: {{^}}double16_extelt_vec:
469; GCN-NOT: buffer_
470; GCN-DAG: v_mov_b32_e32 [[V1HI:v[0-9]+]], 0x3ff19999
471; GCN-DAG: v_mov_b32_e32 [[V1LO:v[0-9]+]], 0x9999999a
472; GCN-DAG: v_mov_b32_e32 [[V2HI:v[0-9]+]], 0x4000cccc
473; GCN-DAG: v_mov_b32_e32 [[V2LO:v[0-9]+]], 0xcccccccd
474; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0
475; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1HI:v[0-9]+]], [[V1HI]], [[V2HI]], [[CC1]]
476; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1LO:v[0-9]+]], [[V1LO]], [[V2LO]], [[CC1]]
477define double @double16_extelt_vec(i32 %sel) {
478entry:
479  %ext = extractelement <16 x double> <double 1.1, double 2.1, double 3.1, double 4.1, double 5.1, double 6.1, double 7.1, double 8.1, double 9.1, double 10.1, double 11.1, double 12.1, double 13.1, double 14.1, double 15.1, double 16.1>, i32 %sel
480  ret double %ext
481}
482