1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
7
8define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
9; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
10; GCN:       ; %bb.0:
11; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
12; GCN-NEXT:    s_waitcnt lgkmcnt(0)
13; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
14; GCN-NEXT:    s_lshr_b32 s1, s0, 24
15; GCN-NEXT:    s_and_b32 s2, s0, 0xff
16; GCN-NEXT:    s_lshl_b32 s3, s3, 8
17; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
18; GCN-NEXT:    s_or_b32 s2, s2, s3
19; GCN-NEXT:    s_lshl_b32 s0, s0, 16
20; GCN-NEXT:    s_or_b32 s0, s2, s0
21; GCN-NEXT:    s_lshl_b32 s1, s1, 24
22; GCN-NEXT:    s_or_b32 s0, s0, s1
23; GCN-NEXT:    s_and_b32 s1, s4, 3
24; GCN-NEXT:    s_lshl_b32 s1, s1, 3
25; GCN-NEXT:    s_lshr_b32 s0, s0, s1
26; GCN-NEXT:    ; return to shader part epilog
27;
28; GFX10-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
29; GFX10:       ; %bb.0:
30; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
31; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
32; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80008
33; GFX10-NEXT:    s_lshr_b32 s1, s0, 24
34; GFX10-NEXT:    s_and_b32 s2, s0, 0xff
35; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x80010
36; GFX10-NEXT:    s_lshl_b32 s3, s3, 8
37; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
38; GFX10-NEXT:    s_or_b32 s2, s2, s3
39; GFX10-NEXT:    s_lshl_b32 s1, s1, 24
40; GFX10-NEXT:    s_or_b32 s0, s2, s0
41; GFX10-NEXT:    s_and_b32 s2, s4, 3
42; GFX10-NEXT:    s_or_b32 s0, s0, s1
43; GFX10-NEXT:    s_lshl_b32 s1, s2, 3
44; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
45; GFX10-NEXT:    ; return to shader part epilog
46;
47; GFX11-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
48; GFX11:       ; %bb.0:
49; GFX11-NEXT:    s_load_b32 s0, s[2:3], 0x0
50; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
51; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x80008
52; GFX11-NEXT:    s_lshr_b32 s1, s0, 24
53; GFX11-NEXT:    s_and_b32 s2, s0, 0xff
54; GFX11-NEXT:    s_bfe_u32 s0, s0, 0x80010
55; GFX11-NEXT:    s_lshl_b32 s3, s3, 8
56; GFX11-NEXT:    s_lshl_b32 s0, s0, 16
57; GFX11-NEXT:    s_or_b32 s2, s2, s3
58; GFX11-NEXT:    s_lshl_b32 s1, s1, 24
59; GFX11-NEXT:    s_or_b32 s0, s2, s0
60; GFX11-NEXT:    s_and_b32 s2, s4, 3
61; GFX11-NEXT:    s_or_b32 s0, s0, s1
62; GFX11-NEXT:    s_lshl_b32 s1, s2, 3
63; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
64; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
65; GFX11-NEXT:    ; return to shader part epilog
66  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
67  %element = extractelement <4 x i8> %vector, i32 %idx
68  ret i8 %element
69}
70
71define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
72; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
73; GFX9:       ; %bb.0:
74; GFX9-NEXT:    global_load_dword v0, v[0:1], off
75; GFX9-NEXT:    v_mov_b32_e32 v2, 8
76; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
77; GFX9-NEXT:    v_mov_b32_e32 v3, 16
78; GFX9-NEXT:    s_and_b32 s0, s2, 3
79; GFX9-NEXT:    s_lshl_b32 s0, s0, 3
80; GFX9-NEXT:    s_waitcnt vmcnt(0)
81; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
82; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
83; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
84; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
85; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
86; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
87; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
88; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
89; GFX9-NEXT:    ; return to shader part epilog
90;
91; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
92; GFX8:       ; %bb.0:
93; GFX8-NEXT:    flat_load_dword v0, v[0:1]
94; GFX8-NEXT:    v_mov_b32_e32 v1, 8
95; GFX8-NEXT:    v_mov_b32_e32 v2, 16
96; GFX8-NEXT:    s_and_b32 s0, s2, 3
97; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
98; GFX8-NEXT:    s_waitcnt vmcnt(0)
99; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
100; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
101; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
102; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
103; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
104; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
105; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
106; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
107; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
108; GFX8-NEXT:    ; return to shader part epilog
109;
110; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
111; GFX7:       ; %bb.0:
112; GFX7-NEXT:    s_mov_b32 s6, 0
113; GFX7-NEXT:    s_mov_b32 s7, 0xf000
114; GFX7-NEXT:    s_mov_b64 s[4:5], 0
115; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
116; GFX7-NEXT:    s_and_b32 s0, s2, 3
117; GFX7-NEXT:    s_lshl_b32 s0, s0, 3
118; GFX7-NEXT:    s_waitcnt vmcnt(0)
119; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
120; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
121; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
122; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
123; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
124; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
125; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
126; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
127; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
128; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
129; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
130; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
131; GFX7-NEXT:    ; return to shader part epilog
132;
133; GFX10-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
134; GFX10:       ; %bb.0:
135; GFX10-NEXT:    global_load_dword v0, v[0:1], off
136; GFX10-NEXT:    v_mov_b32_e32 v1, 8
137; GFX10-NEXT:    v_mov_b32_e32 v2, 16
138; GFX10-NEXT:    s_and_b32 s0, s2, 3
139; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
140; GFX10-NEXT:    s_waitcnt vmcnt(0)
141; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
142; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
143; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
144; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
145; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
146; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
147; GFX10-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
148; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
149; GFX10-NEXT:    ; return to shader part epilog
150;
151; GFX11-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
152; GFX11:       ; %bb.0:
153; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
154; GFX11-NEXT:    s_and_b32 s0, s2, 3
155; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
156; GFX11-NEXT:    s_lshl_b32 s0, s0, 3
157; GFX11-NEXT:    s_waitcnt vmcnt(0)
158; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
159; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
160; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
161; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
162; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
163; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
164; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
165; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
166; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
167; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
168; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
169; GFX11-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
170; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
171; GFX11-NEXT:    ; return to shader part epilog
172  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
173  %element = extractelement <4 x i8> %vector, i32 %idx
174  ret i8 %element
175}
176
177define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %idx) {
178; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
179; GFX9:       ; %bb.0:
180; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181; GFX9-NEXT:    global_load_dword v0, v[0:1], off
182; GFX9-NEXT:    v_mov_b32_e32 v3, 8
183; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
184; GFX9-NEXT:    v_mov_b32_e32 v4, 16
185; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
186; GFX9-NEXT:    s_waitcnt vmcnt(0)
187; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
188; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
189; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
190; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v3
191; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v5
192; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
193; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
194; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
195; GFX9-NEXT:    s_setpc_b64 s[30:31]
196;
197; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
198; GFX8:       ; %bb.0:
199; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200; GFX8-NEXT:    flat_load_dword v0, v[0:1]
201; GFX8-NEXT:    v_mov_b32_e32 v1, 8
202; GFX8-NEXT:    v_mov_b32_e32 v3, 16
203; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
204; GFX8-NEXT:    s_waitcnt vmcnt(0)
205; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
206; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
207; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
208; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
209; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
210; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
211; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
212; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
213; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
214; GFX8-NEXT:    s_setpc_b64 s[30:31]
215;
216; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
217; GFX7:       ; %bb.0:
218; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219; GFX7-NEXT:    s_mov_b32 s6, 0
220; GFX7-NEXT:    s_mov_b32 s7, 0xf000
221; GFX7-NEXT:    s_mov_b64 s[4:5], 0
222; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
223; GFX7-NEXT:    v_and_b32_e32 v1, 3, v2
224; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 3, v1
225; GFX7-NEXT:    s_waitcnt vmcnt(0)
226; GFX7-NEXT:    v_bfe_u32 v4, v0, 8, 8
227; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
228; GFX7-NEXT:    v_and_b32_e32 v3, 0xff, v0
229; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
230; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
231; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
232; GFX7-NEXT:    v_or_b32_e32 v3, v3, v4
233; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
234; GFX7-NEXT:    v_or_b32_e32 v0, v3, v0
235; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
236; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
237; GFX7-NEXT:    s_setpc_b64 s[30:31]
238;
239; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
240; GFX10:       ; %bb.0:
241; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
243; GFX10-NEXT:    global_load_dword v0, v[0:1], off
244; GFX10-NEXT:    v_mov_b32_e32 v1, 8
245; GFX10-NEXT:    v_mov_b32_e32 v3, 16
246; GFX10-NEXT:    v_and_b32_e32 v2, 3, v2
247; GFX10-NEXT:    s_waitcnt vmcnt(0)
248; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
249; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
250; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
251; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
252; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
253; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
254; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
255; GFX10-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
256; GFX10-NEXT:    s_setpc_b64 s[30:31]
257;
258; GFX11-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
259; GFX11:       ; %bb.0:
260; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
262; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
263; GFX11-NEXT:    s_waitcnt vmcnt(0)
264; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
265; GFX11-NEXT:    v_bfe_u32 v3, v0, 16, 8
266; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
267; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
268; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
269; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
270; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
271; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
272; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
273; GFX11-NEXT:    v_and_b32_e32 v1, 3, v2
274; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
275; GFX11-NEXT:    v_or3_b32 v0, v0, v3, v4
276; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 3, v1
277; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
278; GFX11-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
279; GFX11-NEXT:    s_setpc_b64 s[30:31]
280  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
281  %element = extractelement <4 x i8> %vector, i32 %idx
282  ret i8 %element
283}
284
285define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
286; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
287; GFX9:       ; %bb.0:
288; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
289; GFX9-NEXT:    v_and_b32_e32 v0, 3, v0
290; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
291; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
292; GFX9-NEXT:    s_bfe_u32 s3, s0, 0x80008
293; GFX9-NEXT:    s_lshr_b32 s1, s0, 24
294; GFX9-NEXT:    s_and_b32 s2, s0, 0xff
295; GFX9-NEXT:    s_lshl_b32 s3, s3, 8
296; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x80010
297; GFX9-NEXT:    s_or_b32 s2, s2, s3
298; GFX9-NEXT:    s_lshl_b32 s0, s0, 16
299; GFX9-NEXT:    s_or_b32 s0, s2, s0
300; GFX9-NEXT:    s_lshl_b32 s1, s1, 24
301; GFX9-NEXT:    s_or_b32 s0, s0, s1
302; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
303; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
304; GFX9-NEXT:    ; return to shader part epilog
305;
306; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
307; GFX8:       ; %bb.0:
308; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x0
309; GFX8-NEXT:    v_and_b32_e32 v0, 3, v0
310; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
311; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
312; GFX8-NEXT:    s_bfe_u32 s3, s0, 0x80008
313; GFX8-NEXT:    s_lshr_b32 s1, s0, 24
314; GFX8-NEXT:    s_and_b32 s2, s0, 0xff
315; GFX8-NEXT:    s_lshl_b32 s3, s3, 8
316; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x80010
317; GFX8-NEXT:    s_or_b32 s2, s2, s3
318; GFX8-NEXT:    s_lshl_b32 s0, s0, 16
319; GFX8-NEXT:    s_or_b32 s0, s2, s0
320; GFX8-NEXT:    s_lshl_b32 s1, s1, 24
321; GFX8-NEXT:    s_or_b32 s0, s0, s1
322; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
323; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
324; GFX8-NEXT:    ; return to shader part epilog
325;
326; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
327; GFX7:       ; %bb.0:
328; GFX7-NEXT:    s_load_dword s0, s[2:3], 0x0
329; GFX7-NEXT:    v_and_b32_e32 v0, 3, v0
330; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
331; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
332; GFX7-NEXT:    s_bfe_u32 s3, s0, 0x80008
333; GFX7-NEXT:    s_lshr_b32 s1, s0, 24
334; GFX7-NEXT:    s_and_b32 s2, s0, 0xff
335; GFX7-NEXT:    s_lshl_b32 s3, s3, 8
336; GFX7-NEXT:    s_bfe_u32 s0, s0, 0x80010
337; GFX7-NEXT:    s_or_b32 s2, s2, s3
338; GFX7-NEXT:    s_lshl_b32 s0, s0, 16
339; GFX7-NEXT:    s_or_b32 s0, s2, s0
340; GFX7-NEXT:    s_lshl_b32 s1, s1, 24
341; GFX7-NEXT:    s_or_b32 s0, s0, s1
342; GFX7-NEXT:    v_lshr_b32_e32 v0, s0, v0
343; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
344; GFX7-NEXT:    ; return to shader part epilog
345;
346; GFX10-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
347; GFX10:       ; %bb.0:
348; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
349; GFX10-NEXT:    v_and_b32_e32 v0, 3, v0
350; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
351; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
352; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
353; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
354; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
355; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
356; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
357; GFX10-NEXT:    s_or_b32 s1, s1, s2
358; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
359; GFX10-NEXT:    s_or_b32 s1, s1, s3
360; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
361; GFX10-NEXT:    s_or_b32 s0, s1, s0
362; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
363; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
364; GFX10-NEXT:    ; return to shader part epilog
365;
366; GFX11-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
367; GFX11:       ; %bb.0:
368; GFX11-NEXT:    s_load_b32 s0, s[2:3], 0x0
369; GFX11-NEXT:    v_and_b32_e32 v0, 3, v0
370; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
371; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
372; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
373; GFX11-NEXT:    s_bfe_u32 s2, s0, 0x80008
374; GFX11-NEXT:    s_and_b32 s1, s0, 0xff
375; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x80010
376; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
377; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
378; GFX11-NEXT:    s_or_b32 s1, s1, s2
379; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
380; GFX11-NEXT:    s_or_b32 s1, s1, s3
381; GFX11-NEXT:    s_lshl_b32 s0, s0, 24
382; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
383; GFX11-NEXT:    s_or_b32 s0, s1, s0
384; GFX11-NEXT:    v_lshrrev_b32_e64 v0, v0, s0
385; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
386; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
387; GFX11-NEXT:    ; return to shader part epilog
388  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
389  %element = extractelement <4 x i8> %vector, i32 %idx
390  ret i8 %element
391}
392
393define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg %ptr) {
394; GCN-LABEL: extractelement_sgpr_v4i8_idx0:
395; GCN:       ; %bb.0:
396; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
397; GCN-NEXT:    s_waitcnt lgkmcnt(0)
398; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
399; GCN-NEXT:    s_lshr_b32 s1, s0, 24
400; GCN-NEXT:    s_and_b32 s2, s0, 0xff
401; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
402; GCN-NEXT:    s_lshl_b32 s3, s3, 8
403; GCN-NEXT:    s_or_b32 s2, s2, s3
404; GCN-NEXT:    s_lshl_b32 s0, s0, 16
405; GCN-NEXT:    s_or_b32 s0, s2, s0
406; GCN-NEXT:    s_lshl_b32 s1, s1, 24
407; GCN-NEXT:    s_or_b32 s0, s0, s1
408; GCN-NEXT:    ; return to shader part epilog
409;
410; GFX10-LABEL: extractelement_sgpr_v4i8_idx0:
411; GFX10:       ; %bb.0:
412; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
413; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
414; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
415; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
416; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
417; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
418; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
419; GFX10-NEXT:    s_or_b32 s1, s1, s2
420; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
421; GFX10-NEXT:    s_or_b32 s1, s1, s3
422; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
423; GFX10-NEXT:    s_or_b32 s0, s1, s0
424; GFX10-NEXT:    ; return to shader part epilog
425;
426; GFX11-LABEL: extractelement_sgpr_v4i8_idx0:
427; GFX11:       ; %bb.0:
428; GFX11-NEXT:    s_load_b32 s0, s[2:3], 0x0
429; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
430; GFX11-NEXT:    s_bfe_u32 s2, s0, 0x80008
431; GFX11-NEXT:    s_and_b32 s1, s0, 0xff
432; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x80010
433; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
434; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
435; GFX11-NEXT:    s_or_b32 s1, s1, s2
436; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
437; GFX11-NEXT:    s_or_b32 s1, s1, s3
438; GFX11-NEXT:    s_lshl_b32 s0, s0, 24
439; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
440; GFX11-NEXT:    s_or_b32 s0, s1, s0
441; GFX11-NEXT:    ; return to shader part epilog
442  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
443  %element = extractelement <4 x i8> %vector, i32 0
444  ret i8 %element
445}
446
447define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg %ptr) {
448; GCN-LABEL: extractelement_sgpr_v4i8_idx1:
449; GCN:       ; %bb.0:
450; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
451; GCN-NEXT:    s_waitcnt lgkmcnt(0)
452; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
453; GCN-NEXT:    s_lshr_b32 s1, s0, 24
454; GCN-NEXT:    s_and_b32 s2, s0, 0xff
455; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
456; GCN-NEXT:    s_lshl_b32 s3, s3, 8
457; GCN-NEXT:    s_or_b32 s2, s2, s3
458; GCN-NEXT:    s_lshl_b32 s0, s0, 16
459; GCN-NEXT:    s_or_b32 s0, s2, s0
460; GCN-NEXT:    s_lshl_b32 s1, s1, 24
461; GCN-NEXT:    s_or_b32 s0, s0, s1
462; GCN-NEXT:    s_lshr_b32 s0, s0, 8
463; GCN-NEXT:    ; return to shader part epilog
464;
465; GFX10-LABEL: extractelement_sgpr_v4i8_idx1:
466; GFX10:       ; %bb.0:
467; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
468; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
469; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
470; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
471; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
472; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
473; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
474; GFX10-NEXT:    s_or_b32 s1, s1, s2
475; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
476; GFX10-NEXT:    s_or_b32 s1, s1, s3
477; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
478; GFX10-NEXT:    s_or_b32 s0, s1, s0
479; GFX10-NEXT:    s_lshr_b32 s0, s0, 8
480; GFX10-NEXT:    ; return to shader part epilog
481;
482; GFX11-LABEL: extractelement_sgpr_v4i8_idx1:
483; GFX11:       ; %bb.0:
484; GFX11-NEXT:    s_load_b32 s0, s[2:3], 0x0
485; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
486; GFX11-NEXT:    s_bfe_u32 s2, s0, 0x80008
487; GFX11-NEXT:    s_and_b32 s1, s0, 0xff
488; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x80010
489; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
490; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
491; GFX11-NEXT:    s_or_b32 s1, s1, s2
492; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
493; GFX11-NEXT:    s_or_b32 s1, s1, s3
494; GFX11-NEXT:    s_lshl_b32 s0, s0, 24
495; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
496; GFX11-NEXT:    s_or_b32 s0, s1, s0
497; GFX11-NEXT:    s_lshr_b32 s0, s0, 8
498; GFX11-NEXT:    ; return to shader part epilog
499  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
500  %element = extractelement <4 x i8> %vector, i32 1
501  ret i8 %element
502}
503
504define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg %ptr) {
505; GCN-LABEL: extractelement_sgpr_v4i8_idx2:
506; GCN:       ; %bb.0:
507; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
508; GCN-NEXT:    s_waitcnt lgkmcnt(0)
509; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
510; GCN-NEXT:    s_lshr_b32 s1, s0, 24
511; GCN-NEXT:    s_and_b32 s2, s0, 0xff
512; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
513; GCN-NEXT:    s_lshl_b32 s3, s3, 8
514; GCN-NEXT:    s_or_b32 s2, s2, s3
515; GCN-NEXT:    s_lshl_b32 s0, s0, 16
516; GCN-NEXT:    s_or_b32 s0, s2, s0
517; GCN-NEXT:    s_lshl_b32 s1, s1, 24
518; GCN-NEXT:    s_or_b32 s0, s0, s1
519; GCN-NEXT:    s_lshr_b32 s0, s0, 16
520; GCN-NEXT:    ; return to shader part epilog
521;
522; GFX10-LABEL: extractelement_sgpr_v4i8_idx2:
523; GFX10:       ; %bb.0:
524; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
525; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
526; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
527; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
528; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
529; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
530; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
531; GFX10-NEXT:    s_or_b32 s1, s1, s2
532; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
533; GFX10-NEXT:    s_or_b32 s1, s1, s3
534; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
535; GFX10-NEXT:    s_or_b32 s0, s1, s0
536; GFX10-NEXT:    s_lshr_b32 s0, s0, 16
537; GFX10-NEXT:    ; return to shader part epilog
538;
539; GFX11-LABEL: extractelement_sgpr_v4i8_idx2:
540; GFX11:       ; %bb.0:
541; GFX11-NEXT:    s_load_b32 s0, s[2:3], 0x0
542; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
543; GFX11-NEXT:    s_bfe_u32 s2, s0, 0x80008
544; GFX11-NEXT:    s_and_b32 s1, s0, 0xff
545; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x80010
546; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
547; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
548; GFX11-NEXT:    s_or_b32 s1, s1, s2
549; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
550; GFX11-NEXT:    s_or_b32 s1, s1, s3
551; GFX11-NEXT:    s_lshl_b32 s0, s0, 24
552; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
553; GFX11-NEXT:    s_or_b32 s0, s1, s0
554; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
555; GFX11-NEXT:    ; return to shader part epilog
556  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
557  %element = extractelement <4 x i8> %vector, i32 2
558  ret i8 %element
559}
560
561define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg %ptr) {
562; GCN-LABEL: extractelement_sgpr_v4i8_idx3:
563; GCN:       ; %bb.0:
564; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
565; GCN-NEXT:    s_waitcnt lgkmcnt(0)
566; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
567; GCN-NEXT:    s_lshr_b32 s1, s0, 24
568; GCN-NEXT:    s_and_b32 s2, s0, 0xff
569; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
570; GCN-NEXT:    s_lshl_b32 s3, s3, 8
571; GCN-NEXT:    s_or_b32 s2, s2, s3
572; GCN-NEXT:    s_lshl_b32 s0, s0, 16
573; GCN-NEXT:    s_or_b32 s0, s2, s0
574; GCN-NEXT:    s_lshl_b32 s1, s1, 24
575; GCN-NEXT:    s_or_b32 s0, s0, s1
576; GCN-NEXT:    s_lshr_b32 s0, s0, 24
577; GCN-NEXT:    ; return to shader part epilog
578;
579; GFX10-LABEL: extractelement_sgpr_v4i8_idx3:
580; GFX10:       ; %bb.0:
581; GFX10-NEXT:    s_load_dword s0, s[2:3], 0x0
582; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
583; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
584; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
585; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
586; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
587; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
588; GFX10-NEXT:    s_or_b32 s1, s1, s2
589; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
590; GFX10-NEXT:    s_or_b32 s1, s1, s3
591; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
592; GFX10-NEXT:    s_or_b32 s0, s1, s0
593; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
594; GFX10-NEXT:    ; return to shader part epilog
595;
596; GFX11-LABEL: extractelement_sgpr_v4i8_idx3:
597; GFX11:       ; %bb.0:
598; GFX11-NEXT:    s_load_b32 s0, s[2:3], 0x0
599; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
600; GFX11-NEXT:    s_bfe_u32 s2, s0, 0x80008
601; GFX11-NEXT:    s_and_b32 s1, s0, 0xff
602; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x80010
603; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
604; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
605; GFX11-NEXT:    s_or_b32 s1, s1, s2
606; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
607; GFX11-NEXT:    s_or_b32 s1, s1, s3
608; GFX11-NEXT:    s_lshl_b32 s0, s0, 24
609; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
610; GFX11-NEXT:    s_or_b32 s0, s1, s0
611; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
612; GFX11-NEXT:    ; return to shader part epilog
613  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
614  %element = extractelement <4 x i8> %vector, i32 3
615  ret i8 %element
616}
617
618define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) {
619; GFX9-LABEL: extractelement_vgpr_v4i8_idx0:
620; GFX9:       ; %bb.0:
621; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
622; GFX9-NEXT:    global_load_dword v0, v[0:1], off
623; GFX9-NEXT:    v_mov_b32_e32 v2, 8
624; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
625; GFX9-NEXT:    v_mov_b32_e32 v3, 16
626; GFX9-NEXT:    s_waitcnt vmcnt(0)
627; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
628; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
629; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
630; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
631; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
632; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
633; GFX9-NEXT:    s_setpc_b64 s[30:31]
634;
635; GFX8-LABEL: extractelement_vgpr_v4i8_idx0:
636; GFX8:       ; %bb.0:
637; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
638; GFX8-NEXT:    flat_load_dword v0, v[0:1]
639; GFX8-NEXT:    v_mov_b32_e32 v1, 8
640; GFX8-NEXT:    v_mov_b32_e32 v2, 16
641; GFX8-NEXT:    s_waitcnt vmcnt(0)
642; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
643; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
644; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
645; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
646; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
647; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
648; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
649; GFX8-NEXT:    s_setpc_b64 s[30:31]
650;
651; GFX7-LABEL: extractelement_vgpr_v4i8_idx0:
652; GFX7:       ; %bb.0:
653; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
654; GFX7-NEXT:    s_mov_b32 s6, 0
655; GFX7-NEXT:    s_mov_b32 s7, 0xf000
656; GFX7-NEXT:    s_mov_b64 s[4:5], 0
657; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
658; GFX7-NEXT:    s_waitcnt vmcnt(0)
659; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
660; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
661; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
662; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
663; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
664; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
665; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
666; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
667; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
668; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
669; GFX7-NEXT:    s_setpc_b64 s[30:31]
670;
671; GFX10-LABEL: extractelement_vgpr_v4i8_idx0:
672; GFX10:       ; %bb.0:
673; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
675; GFX10-NEXT:    global_load_dword v0, v[0:1], off
676; GFX10-NEXT:    v_mov_b32_e32 v1, 8
677; GFX10-NEXT:    v_mov_b32_e32 v2, 16
678; GFX10-NEXT:    s_waitcnt vmcnt(0)
679; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
680; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
681; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
682; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
683; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
684; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
685; GFX10-NEXT:    s_setpc_b64 s[30:31]
686;
687; GFX11-LABEL: extractelement_vgpr_v4i8_idx0:
688; GFX11:       ; %bb.0:
689; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
690; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
691; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
692; GFX11-NEXT:    s_waitcnt vmcnt(0)
693; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
694; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
695; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
696; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
697; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
698; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
699; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
700; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
701; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
702; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
703; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
704; GFX11-NEXT:    s_setpc_b64 s[30:31]
705  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
706  %element = extractelement <4 x i8> %vector, i32 0
707  ret i8 %element
708}
709
710define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) {
711; GFX9-LABEL: extractelement_vgpr_v4i8_idx1:
712; GFX9:       ; %bb.0:
713; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
714; GFX9-NEXT:    global_load_dword v0, v[0:1], off
715; GFX9-NEXT:    s_mov_b32 s4, 8
716; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
717; GFX9-NEXT:    v_mov_b32_e32 v2, 16
718; GFX9-NEXT:    s_waitcnt vmcnt(0)
719; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
720; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
721; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
722; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v4
723; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
724; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
725; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
726; GFX9-NEXT:    s_setpc_b64 s[30:31]
727;
728; GFX8-LABEL: extractelement_vgpr_v4i8_idx1:
729; GFX8:       ; %bb.0:
730; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
731; GFX8-NEXT:    flat_load_dword v0, v[0:1]
732; GFX8-NEXT:    v_mov_b32_e32 v1, 8
733; GFX8-NEXT:    v_mov_b32_e32 v2, 16
734; GFX8-NEXT:    s_waitcnt vmcnt(0)
735; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
736; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
737; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
738; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
739; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
740; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
741; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
742; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
743; GFX8-NEXT:    s_setpc_b64 s[30:31]
744;
745; GFX7-LABEL: extractelement_vgpr_v4i8_idx1:
746; GFX7:       ; %bb.0:
747; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
748; GFX7-NEXT:    s_mov_b32 s6, 0
749; GFX7-NEXT:    s_mov_b32 s7, 0xf000
750; GFX7-NEXT:    s_mov_b64 s[4:5], 0
751; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
752; GFX7-NEXT:    s_waitcnt vmcnt(0)
753; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
754; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
755; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
756; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
757; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
758; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
759; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
760; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
761; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
762; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
763; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
764; GFX7-NEXT:    s_setpc_b64 s[30:31]
765;
766; GFX10-LABEL: extractelement_vgpr_v4i8_idx1:
767; GFX10:       ; %bb.0:
768; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
770; GFX10-NEXT:    global_load_dword v0, v[0:1], off
771; GFX10-NEXT:    s_mov_b32 s4, 8
772; GFX10-NEXT:    v_mov_b32_e32 v1, 16
773; GFX10-NEXT:    s_waitcnt vmcnt(0)
774; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
775; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
776; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
777; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v2
778; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
779; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
780; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
781; GFX10-NEXT:    s_setpc_b64 s[30:31]
782;
783; GFX11-LABEL: extractelement_vgpr_v4i8_idx1:
784; GFX11:       ; %bb.0:
785; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
786; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
787; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
788; GFX11-NEXT:    s_waitcnt vmcnt(0)
789; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
790; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
791; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
792; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
793; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
794; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
795; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
796; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
797; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
798; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
799; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
800; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
801; GFX11-NEXT:    s_setpc_b64 s[30:31]
802  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
803  %element = extractelement <4 x i8> %vector, i32 1
804  ret i8 %element
805}
806
807define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) {
808; GFX9-LABEL: extractelement_vgpr_v4i8_idx2:
809; GFX9:       ; %bb.0:
810; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
811; GFX9-NEXT:    global_load_dword v0, v[0:1], off
812; GFX9-NEXT:    v_mov_b32_e32 v2, 8
813; GFX9-NEXT:    s_mov_b32 s4, 16
814; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
815; GFX9-NEXT:    s_waitcnt vmcnt(0)
816; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
817; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
818; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
819; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
820; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
821; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
822; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
823; GFX9-NEXT:    s_setpc_b64 s[30:31]
824;
825; GFX8-LABEL: extractelement_vgpr_v4i8_idx2:
826; GFX8:       ; %bb.0:
827; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
828; GFX8-NEXT:    flat_load_dword v0, v[0:1]
829; GFX8-NEXT:    v_mov_b32_e32 v1, 8
830; GFX8-NEXT:    v_mov_b32_e32 v2, 16
831; GFX8-NEXT:    s_waitcnt vmcnt(0)
832; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
833; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
834; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
835; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
836; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
837; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
838; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
839; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
840; GFX8-NEXT:    s_setpc_b64 s[30:31]
841;
842; GFX7-LABEL: extractelement_vgpr_v4i8_idx2:
843; GFX7:       ; %bb.0:
844; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
845; GFX7-NEXT:    s_mov_b32 s6, 0
846; GFX7-NEXT:    s_mov_b32 s7, 0xf000
847; GFX7-NEXT:    s_mov_b64 s[4:5], 0
848; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
849; GFX7-NEXT:    s_waitcnt vmcnt(0)
850; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
851; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
852; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
853; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
854; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
855; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
856; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
857; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
858; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
859; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
860; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
861; GFX7-NEXT:    s_setpc_b64 s[30:31]
862;
863; GFX10-LABEL: extractelement_vgpr_v4i8_idx2:
864; GFX10:       ; %bb.0:
865; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
867; GFX10-NEXT:    global_load_dword v0, v[0:1], off
868; GFX10-NEXT:    v_mov_b32_e32 v1, 8
869; GFX10-NEXT:    s_mov_b32 s4, 16
870; GFX10-NEXT:    s_waitcnt vmcnt(0)
871; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
872; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
873; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
874; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
875; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
876; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
877; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
878; GFX10-NEXT:    s_setpc_b64 s[30:31]
879;
880; GFX11-LABEL: extractelement_vgpr_v4i8_idx2:
881; GFX11:       ; %bb.0:
882; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
883; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
884; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
885; GFX11-NEXT:    s_waitcnt vmcnt(0)
886; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
887; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
888; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
889; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
890; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
891; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
892; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
893; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
894; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
895; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
896; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
897; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
898; GFX11-NEXT:    s_setpc_b64 s[30:31]
899  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
900  %element = extractelement <4 x i8> %vector, i32 2
901  ret i8 %element
902}
903
904define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) {
905; GFX9-LABEL: extractelement_vgpr_v4i8_idx3:
906; GFX9:       ; %bb.0:
907; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
908; GFX9-NEXT:    global_load_dword v0, v[0:1], off
909; GFX9-NEXT:    v_mov_b32_e32 v2, 8
910; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
911; GFX9-NEXT:    v_mov_b32_e32 v3, 16
912; GFX9-NEXT:    s_waitcnt vmcnt(0)
913; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
914; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
915; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
916; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
917; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
918; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
919; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
920; GFX9-NEXT:    s_setpc_b64 s[30:31]
921;
922; GFX8-LABEL: extractelement_vgpr_v4i8_idx3:
923; GFX8:       ; %bb.0:
924; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
925; GFX8-NEXT:    flat_load_dword v0, v[0:1]
926; GFX8-NEXT:    v_mov_b32_e32 v1, 8
927; GFX8-NEXT:    v_mov_b32_e32 v2, 16
928; GFX8-NEXT:    s_waitcnt vmcnt(0)
929; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
930; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
931; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
932; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
933; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
934; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
935; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
936; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
937; GFX8-NEXT:    s_setpc_b64 s[30:31]
938;
939; GFX7-LABEL: extractelement_vgpr_v4i8_idx3:
940; GFX7:       ; %bb.0:
941; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
942; GFX7-NEXT:    s_mov_b32 s6, 0
943; GFX7-NEXT:    s_mov_b32 s7, 0xf000
944; GFX7-NEXT:    s_mov_b64 s[4:5], 0
945; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
946; GFX7-NEXT:    s_waitcnt vmcnt(0)
947; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
948; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
949; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
950; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
951; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
952; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
953; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
954; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
955; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
956; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
957; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
958; GFX7-NEXT:    s_setpc_b64 s[30:31]
959;
960; GFX10-LABEL: extractelement_vgpr_v4i8_idx3:
961; GFX10:       ; %bb.0:
962; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
963; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
964; GFX10-NEXT:    global_load_dword v0, v[0:1], off
965; GFX10-NEXT:    v_mov_b32_e32 v1, 8
966; GFX10-NEXT:    v_mov_b32_e32 v2, 16
967; GFX10-NEXT:    s_waitcnt vmcnt(0)
968; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
969; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
970; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
971; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
972; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
973; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
974; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
975; GFX10-NEXT:    s_setpc_b64 s[30:31]
976;
977; GFX11-LABEL: extractelement_vgpr_v4i8_idx3:
978; GFX11:       ; %bb.0:
979; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
980; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
981; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
982; GFX11-NEXT:    s_waitcnt vmcnt(0)
983; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
984; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
985; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
986; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
987; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
988; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
989; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
990; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
991; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
992; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
993; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
994; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
995; GFX11-NEXT:    s_setpc_b64 s[30:31]
996  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
997  %element = extractelement <4 x i8> %vector, i32 3
998  ret i8 %element
999}
1000
1001define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
1002; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
1003; GCN:       ; %bb.0:
1004; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1005; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1006; GCN-NEXT:    s_bfe_u32 s6, s0, 0x80008
1007; GCN-NEXT:    s_lshr_b32 s2, s0, 24
1008; GCN-NEXT:    s_and_b32 s5, s0, 0xff
1009; GCN-NEXT:    s_lshl_b32 s6, s6, 8
1010; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
1011; GCN-NEXT:    s_or_b32 s5, s5, s6
1012; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1013; GCN-NEXT:    s_or_b32 s0, s5, s0
1014; GCN-NEXT:    s_lshl_b32 s2, s2, 24
1015; GCN-NEXT:    s_bfe_u32 s5, s1, 0x80008
1016; GCN-NEXT:    s_lshr_b32 s3, s1, 24
1017; GCN-NEXT:    s_or_b32 s0, s0, s2
1018; GCN-NEXT:    s_and_b32 s2, s1, 0xff
1019; GCN-NEXT:    s_lshl_b32 s5, s5, 8
1020; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
1021; GCN-NEXT:    s_or_b32 s2, s2, s5
1022; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1023; GCN-NEXT:    s_or_b32 s1, s2, s1
1024; GCN-NEXT:    s_lshl_b32 s2, s3, 24
1025; GCN-NEXT:    s_or_b32 s1, s1, s2
1026; GCN-NEXT:    s_lshr_b32 s2, s4, 2
1027; GCN-NEXT:    s_cmp_eq_u32 s2, 1
1028; GCN-NEXT:    s_cselect_b32 s0, s1, s0
1029; GCN-NEXT:    s_and_b32 s1, s4, 3
1030; GCN-NEXT:    s_lshl_b32 s1, s1, 3
1031; GCN-NEXT:    s_lshr_b32 s0, s0, s1
1032; GCN-NEXT:    ; return to shader part epilog
1033;
1034; GFX10-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
1035; GFX10:       ; %bb.0:
1036; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1037; GFX10-NEXT:    s_lshr_b32 s2, s4, 2
1038; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1039; GFX10-NEXT:    s_bfe_u32 s7, s0, 0x80008
1040; GFX10-NEXT:    s_bfe_u32 s9, s1, 0x80008
1041; GFX10-NEXT:    s_lshr_b32 s3, s0, 24
1042; GFX10-NEXT:    s_lshr_b32 s5, s1, 24
1043; GFX10-NEXT:    s_and_b32 s6, s0, 0xff
1044; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x80010
1045; GFX10-NEXT:    s_and_b32 s8, s1, 0xff
1046; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x80010
1047; GFX10-NEXT:    s_lshl_b32 s7, s7, 8
1048; GFX10-NEXT:    s_lshl_b32 s9, s9, 8
1049; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
1050; GFX10-NEXT:    s_lshl_b32 s1, s1, 16
1051; GFX10-NEXT:    s_or_b32 s6, s6, s7
1052; GFX10-NEXT:    s_or_b32 s7, s8, s9
1053; GFX10-NEXT:    s_lshl_b32 s3, s3, 24
1054; GFX10-NEXT:    s_lshl_b32 s5, s5, 24
1055; GFX10-NEXT:    s_or_b32 s0, s6, s0
1056; GFX10-NEXT:    s_or_b32 s1, s7, s1
1057; GFX10-NEXT:    s_or_b32 s0, s0, s3
1058; GFX10-NEXT:    s_or_b32 s1, s1, s5
1059; GFX10-NEXT:    s_cmp_eq_u32 s2, 1
1060; GFX10-NEXT:    s_cselect_b32 s0, s1, s0
1061; GFX10-NEXT:    s_and_b32 s1, s4, 3
1062; GFX10-NEXT:    s_lshl_b32 s1, s1, 3
1063; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
1064; GFX10-NEXT:    ; return to shader part epilog
1065;
1066; GFX11-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
1067; GFX11:       ; %bb.0:
1068; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1069; GFX11-NEXT:    s_lshr_b32 s2, s4, 2
1070; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1071; GFX11-NEXT:    s_bfe_u32 s7, s0, 0x80008
1072; GFX11-NEXT:    s_bfe_u32 s9, s1, 0x80008
1073; GFX11-NEXT:    s_lshr_b32 s3, s0, 24
1074; GFX11-NEXT:    s_lshr_b32 s5, s1, 24
1075; GFX11-NEXT:    s_and_b32 s6, s0, 0xff
1076; GFX11-NEXT:    s_bfe_u32 s0, s0, 0x80010
1077; GFX11-NEXT:    s_and_b32 s8, s1, 0xff
1078; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x80010
1079; GFX11-NEXT:    s_lshl_b32 s7, s7, 8
1080; GFX11-NEXT:    s_lshl_b32 s9, s9, 8
1081; GFX11-NEXT:    s_lshl_b32 s0, s0, 16
1082; GFX11-NEXT:    s_lshl_b32 s1, s1, 16
1083; GFX11-NEXT:    s_or_b32 s6, s6, s7
1084; GFX11-NEXT:    s_or_b32 s7, s8, s9
1085; GFX11-NEXT:    s_lshl_b32 s3, s3, 24
1086; GFX11-NEXT:    s_lshl_b32 s5, s5, 24
1087; GFX11-NEXT:    s_or_b32 s0, s6, s0
1088; GFX11-NEXT:    s_or_b32 s1, s7, s1
1089; GFX11-NEXT:    s_or_b32 s0, s0, s3
1090; GFX11-NEXT:    s_or_b32 s1, s1, s5
1091; GFX11-NEXT:    s_cmp_eq_u32 s2, 1
1092; GFX11-NEXT:    s_cselect_b32 s0, s1, s0
1093; GFX11-NEXT:    s_and_b32 s1, s4, 3
1094; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1095; GFX11-NEXT:    s_lshl_b32 s1, s1, 3
1096; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
1097; GFX11-NEXT:    ; return to shader part epilog
1098  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1099  %element = extractelement <8 x i8> %vector, i32 %idx
1100  ret i8 %element
1101}
1102
1103define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
1104; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
1105; GFX9:       ; %bb.0:
1106; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1107; GFX9-NEXT:    s_mov_b32 s0, 8
1108; GFX9-NEXT:    s_mov_b32 s1, 16
1109; GFX9-NEXT:    s_movk_i32 s3, 0xff
1110; GFX9-NEXT:    s_lshr_b32 s4, s2, 2
1111; GFX9-NEXT:    s_and_b32 s2, s2, 3
1112; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s4, 1
1113; GFX9-NEXT:    s_waitcnt vmcnt(0)
1114; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
1115; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1116; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1117; GFX9-NEXT:    v_lshlrev_b32_sdwa v6, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1118; GFX9-NEXT:    v_lshlrev_b32_sdwa v5, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1119; GFX9-NEXT:    v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1120; GFX9-NEXT:    v_and_or_b32 v0, v0, s3, v4
1121; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1122; GFX9-NEXT:    v_and_or_b32 v1, v1, s3, v6
1123; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1124; GFX9-NEXT:    v_or3_b32 v0, v0, v5, v2
1125; GFX9-NEXT:    v_or3_b32 v1, v1, v7, v3
1126; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1127; GFX9-NEXT:    s_lshl_b32 s0, s2, 3
1128; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
1129; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1130; GFX9-NEXT:    ; return to shader part epilog
1131;
1132; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
1133; GFX8:       ; %bb.0:
1134; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1135; GFX8-NEXT:    v_mov_b32_e32 v2, 8
1136; GFX8-NEXT:    v_mov_b32_e32 v3, 16
1137; GFX8-NEXT:    s_lshr_b32 s0, s2, 2
1138; GFX8-NEXT:    s_and_b32 s1, s2, 3
1139; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
1140; GFX8-NEXT:    s_lshl_b32 s0, s1, 3
1141; GFX8-NEXT:    s_waitcnt vmcnt(0)
1142; GFX8-NEXT:    v_lshlrev_b32_sdwa v6, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1143; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1144; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1145; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
1146; GFX8-NEXT:    v_lshlrev_b32_sdwa v7, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1147; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1148; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1149; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1150; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
1151; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 24, v5
1152; GFX8-NEXT:    v_or_b32_e32 v0, v0, v7
1153; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
1154; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
1155; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
1156; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1157; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
1158; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1159; GFX8-NEXT:    ; return to shader part epilog
1160;
1161; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
1162; GFX7:       ; %bb.0:
1163; GFX7-NEXT:    s_mov_b32 s6, 0
1164; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1165; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1166; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1167; GFX7-NEXT:    s_lshr_b32 s0, s2, 2
1168; GFX7-NEXT:    s_and_b32 s1, s2, 3
1169; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
1170; GFX7-NEXT:    s_lshl_b32 s0, s1, 3
1171; GFX7-NEXT:    s_waitcnt vmcnt(0)
1172; GFX7-NEXT:    v_bfe_u32 v5, v0, 8, 8
1173; GFX7-NEXT:    v_bfe_u32 v7, v1, 8, 8
1174; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
1175; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
1176; GFX7-NEXT:    v_and_b32_e32 v4, 0xff, v0
1177; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
1178; GFX7-NEXT:    v_and_b32_e32 v6, 0xff, v1
1179; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
1180; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
1181; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 8, v7
1182; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1183; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1184; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
1185; GFX7-NEXT:    v_or_b32_e32 v5, v6, v7
1186; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1187; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1188; GFX7-NEXT:    v_or_b32_e32 v0, v4, v0
1189; GFX7-NEXT:    v_or_b32_e32 v1, v5, v1
1190; GFX7-NEXT:    v_or_b32_e32 v0, v0, v2
1191; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
1192; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1193; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
1194; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1195; GFX7-NEXT:    ; return to shader part epilog
1196;
1197; GFX10-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
1198; GFX10:       ; %bb.0:
1199; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1200; GFX10-NEXT:    s_mov_b32 s0, 8
1201; GFX10-NEXT:    s_mov_b32 s1, 16
1202; GFX10-NEXT:    s_waitcnt vmcnt(0)
1203; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
1204; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1205; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
1206; GFX10-NEXT:    v_lshlrev_b32_sdwa v5, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1207; GFX10-NEXT:    v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1208; GFX10-NEXT:    v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1209; GFX10-NEXT:    v_and_or_b32 v0, v0, 0xff, v3
1210; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1211; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xff, v5
1212; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
1213; GFX10-NEXT:    s_lshr_b32 s0, s2, 2
1214; GFX10-NEXT:    v_or3_b32 v0, v0, v6, v2
1215; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 1
1216; GFX10-NEXT:    v_or3_b32 v1, v1, v7, v3
1217; GFX10-NEXT:    s_and_b32 s0, s2, 3
1218; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
1219; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1220; GFX10-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
1221; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1222; GFX10-NEXT:    ; return to shader part epilog
1223;
1224; GFX11-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
1225; GFX11:       ; %bb.0:
1226; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
1227; GFX11-NEXT:    s_lshr_b32 s0, s2, 2
1228; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
1229; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 1
1230; GFX11-NEXT:    s_and_b32 s0, s2, 3
1231; GFX11-NEXT:    s_lshl_b32 s0, s0, 3
1232; GFX11-NEXT:    s_waitcnt vmcnt(0)
1233; GFX11-NEXT:    v_bfe_u32 v5, v1, 8, 8
1234; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 24, v1
1235; GFX11-NEXT:    v_bfe_u32 v7, v1, 16, 8
1236; GFX11-NEXT:    v_bfe_u32 v3, v0, 8, 8
1237; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
1238; GFX11-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
1239; GFX11-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
1240; GFX11-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
1241; GFX11-NEXT:    v_bfe_u32 v4, v0, 16, 8
1242; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1243; GFX11-NEXT:    v_and_or_b32 v1, v1, 0xff, v5
1244; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
1245; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
1246; GFX11-NEXT:    v_or3_b32 v1, v1, v7, v6
1247; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1248; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1249; GFX11-NEXT:    v_and_or_b32 v0, v0, 0xff, v3
1250; GFX11-NEXT:    v_or3_b32 v0, v0, v4, v2
1251; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1252; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1253; GFX11-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
1254; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1255; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1256; GFX11-NEXT:    ; return to shader part epilog
1257  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1258  %element = extractelement <8 x i8> %vector, i32 %idx
1259  ret i8 %element
1260}
1261
1262define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %idx) {
1263; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
1264; GFX9:       ; %bb.0:
1265; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1266; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1267; GFX9-NEXT:    s_mov_b32 s4, 8
1268; GFX9-NEXT:    s_mov_b32 s5, 16
1269; GFX9-NEXT:    s_movk_i32 s6, 0xff
1270; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 2, v2
1271; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
1272; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
1273; GFX9-NEXT:    s_waitcnt vmcnt(0)
1274; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1275; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
1276; GFX9-NEXT:    v_lshlrev_b32_sdwa v6, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1277; GFX9-NEXT:    v_lshlrev_b32_sdwa v8, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1278; GFX9-NEXT:    v_lshlrev_b32_sdwa v7, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1279; GFX9-NEXT:    v_lshlrev_b32_sdwa v9, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1280; GFX9-NEXT:    v_and_or_b32 v0, v0, s6, v6
1281; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
1282; GFX9-NEXT:    v_and_or_b32 v1, v1, s6, v8
1283; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
1284; GFX9-NEXT:    v_or3_b32 v0, v0, v7, v4
1285; GFX9-NEXT:    v_or3_b32 v1, v1, v9, v5
1286; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1287; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
1288; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
1289; GFX9-NEXT:    s_setpc_b64 s[30:31]
1290;
1291; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
1292; GFX8:       ; %bb.0:
1293; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
1295; GFX8-NEXT:    v_mov_b32_e32 v3, 8
1296; GFX8-NEXT:    v_mov_b32_e32 v4, 16
1297; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 2, v2
1298; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
1299; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v5
1300; GFX8-NEXT:    s_waitcnt vmcnt(0)
1301; GFX8-NEXT:    v_lshlrev_b32_sdwa v8, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1302; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1303; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 24, v0
1304; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
1305; GFX8-NEXT:    v_lshlrev_b32_sdwa v9, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1306; GFX8-NEXT:    v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1307; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1308; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1309; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
1310; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 24, v7
1311; GFX8-NEXT:    v_or_b32_e32 v0, v0, v9
1312; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
1313; GFX8-NEXT:    v_or_b32_e32 v0, v0, v6
1314; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
1315; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1316; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
1317; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
1318; GFX8-NEXT:    s_setpc_b64 s[30:31]
1319;
1320; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
1321; GFX7:       ; %bb.0:
1322; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1323; GFX7-NEXT:    s_mov_b32 s6, 0
1324; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1325; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1326; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
1327; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 2, v2
1328; GFX7-NEXT:    v_and_b32_e32 v2, 3, v2
1329; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
1330; GFX7-NEXT:    s_waitcnt vmcnt(0)
1331; GFX7-NEXT:    v_bfe_u32 v7, v0, 8, 8
1332; GFX7-NEXT:    v_bfe_u32 v9, v1, 8, 8
1333; GFX7-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1334; GFX7-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
1335; GFX7-NEXT:    v_and_b32_e32 v6, 0xff, v0
1336; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
1337; GFX7-NEXT:    v_and_b32_e32 v8, 0xff, v1
1338; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
1339; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 8, v7
1340; GFX7-NEXT:    v_lshlrev_b32_e32 v9, 8, v9
1341; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1342; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1343; GFX7-NEXT:    v_or_b32_e32 v6, v6, v7
1344; GFX7-NEXT:    v_or_b32_e32 v7, v8, v9
1345; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
1346; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
1347; GFX7-NEXT:    v_or_b32_e32 v0, v6, v0
1348; GFX7-NEXT:    v_or_b32_e32 v1, v7, v1
1349; GFX7-NEXT:    v_or_b32_e32 v0, v0, v4
1350; GFX7-NEXT:    v_or_b32_e32 v1, v1, v5
1351; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1352; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
1353; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
1354; GFX7-NEXT:    s_setpc_b64 s[30:31]
1355;
1356; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
1357; GFX10:       ; %bb.0:
1358; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1359; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1360; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1361; GFX10-NEXT:    s_mov_b32 s4, 8
1362; GFX10-NEXT:    s_mov_b32 s5, 16
1363; GFX10-NEXT:    s_waitcnt vmcnt(0)
1364; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1365; GFX10-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1366; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
1367; GFX10-NEXT:    v_lshlrev_b32_sdwa v6, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1368; GFX10-NEXT:    v_lshlrev_b32_sdwa v7, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1369; GFX10-NEXT:    v_lshlrev_b32_sdwa v8, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1370; GFX10-NEXT:    v_and_or_b32 v0, v0, 0xff, v4
1371; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1372; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xff, v6
1373; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 24, v5
1374; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 2, v2
1375; GFX10-NEXT:    v_and_b32_e32 v2, 3, v2
1376; GFX10-NEXT:    v_or3_b32 v0, v0, v7, v3
1377; GFX10-NEXT:    v_or3_b32 v1, v1, v8, v4
1378; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
1379; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1380; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
1381; GFX10-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
1382; GFX10-NEXT:    s_setpc_b64 s[30:31]
1383;
1384; GFX11-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
1385; GFX11:       ; %bb.0:
1386; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1387; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1388; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
1389; GFX11-NEXT:    s_waitcnt vmcnt(0)
1390; GFX11-NEXT:    v_bfe_u32 v4, v0, 8, 8
1391; GFX11-NEXT:    v_bfe_u32 v6, v1, 8, 8
1392; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
1393; GFX11-NEXT:    v_bfe_u32 v8, v1, 16, 8
1394; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1395; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
1396; GFX11-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
1397; GFX11-NEXT:    v_bfe_u32 v5, v0, 16, 8
1398; GFX11-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
1399; GFX11-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
1400; GFX11-NEXT:    v_and_or_b32 v0, v0, 0xff, v4
1401; GFX11-NEXT:    v_and_or_b32 v1, v1, 0xff, v6
1402; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 2, v2
1403; GFX11-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1404; GFX11-NEXT:    v_and_b32_e32 v2, 3, v2
1405; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1406; GFX11-NEXT:    v_or3_b32 v1, v1, v8, v7
1407; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
1408; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1409; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1410; GFX11-NEXT:    v_or3_b32 v0, v0, v5, v3
1411; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v1 :: v_dual_lshlrev_b32 v1, 3, v2
1412; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1413; GFX11-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
1414; GFX11-NEXT:    s_setpc_b64 s[30:31]
1415  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
1416  %element = extractelement <8 x i8> %vector, i32 %idx
1417  ret i8 %element
1418}
1419
1420define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
1421; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
1422; GCN:       ; %bb.0:
1423; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1424; GCN-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
1425; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
1426; GCN-NEXT:    v_and_b32_e32 v0, 3, v0
1427; GCN-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1428; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1429; GCN-NEXT:    s_bfe_u32 s5, s0, 0x80008
1430; GCN-NEXT:    s_lshr_b32 s2, s0, 24
1431; GCN-NEXT:    s_and_b32 s4, s0, 0xff
1432; GCN-NEXT:    s_lshl_b32 s5, s5, 8
1433; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
1434; GCN-NEXT:    s_or_b32 s4, s4, s5
1435; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1436; GCN-NEXT:    s_or_b32 s0, s4, s0
1437; GCN-NEXT:    s_lshl_b32 s2, s2, 24
1438; GCN-NEXT:    s_bfe_u32 s4, s1, 0x80008
1439; GCN-NEXT:    s_lshr_b32 s3, s1, 24
1440; GCN-NEXT:    s_or_b32 s0, s0, s2
1441; GCN-NEXT:    s_and_b32 s2, s1, 0xff
1442; GCN-NEXT:    s_lshl_b32 s4, s4, 8
1443; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
1444; GCN-NEXT:    s_or_b32 s2, s2, s4
1445; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1446; GCN-NEXT:    s_or_b32 s1, s2, s1
1447; GCN-NEXT:    s_lshl_b32 s2, s3, 24
1448; GCN-NEXT:    s_or_b32 s1, s1, s2
1449; GCN-NEXT:    v_mov_b32_e32 v2, s0
1450; GCN-NEXT:    v_mov_b32_e32 v3, s1
1451; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
1452; GCN-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
1453; GCN-NEXT:    v_readfirstlane_b32 s0, v0
1454; GCN-NEXT:    ; return to shader part epilog
1455;
1456; GFX10-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
1457; GFX10:       ; %bb.0:
1458; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1459; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
1460; GFX10-NEXT:    v_and_b32_e32 v0, 3, v0
1461; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
1462; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1463; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1464; GFX10-NEXT:    s_bfe_u32 s7, s1, 0x80008
1465; GFX10-NEXT:    s_lshr_b32 s3, s1, 24
1466; GFX10-NEXT:    s_and_b32 s6, s1, 0xff
1467; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x80010
1468; GFX10-NEXT:    s_lshl_b32 s7, s7, 8
1469; GFX10-NEXT:    s_lshl_b32 s1, s1, 16
1470; GFX10-NEXT:    s_or_b32 s6, s6, s7
1471; GFX10-NEXT:    s_bfe_u32 s5, s0, 0x80008
1472; GFX10-NEXT:    s_lshl_b32 s3, s3, 24
1473; GFX10-NEXT:    s_or_b32 s1, s6, s1
1474; GFX10-NEXT:    s_lshr_b32 s2, s0, 24
1475; GFX10-NEXT:    s_and_b32 s4, s0, 0xff
1476; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x80010
1477; GFX10-NEXT:    s_lshl_b32 s5, s5, 8
1478; GFX10-NEXT:    s_or_b32 s1, s1, s3
1479; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
1480; GFX10-NEXT:    s_or_b32 s3, s4, s5
1481; GFX10-NEXT:    v_mov_b32_e32 v2, s1
1482; GFX10-NEXT:    s_lshl_b32 s2, s2, 24
1483; GFX10-NEXT:    s_or_b32 s0, s3, s0
1484; GFX10-NEXT:    s_or_b32 s0, s0, s2
1485; GFX10-NEXT:    v_cndmask_b32_e32 v1, s0, v2, vcc_lo
1486; GFX10-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
1487; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1488; GFX10-NEXT:    ; return to shader part epilog
1489;
1490; GFX11-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
1491; GFX11:       ; %bb.0:
1492; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1493; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
1494; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1495; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
1496; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1497; GFX11-NEXT:    s_bfe_u32 s7, s1, 0x80008
1498; GFX11-NEXT:    s_lshr_b32 s3, s1, 24
1499; GFX11-NEXT:    s_and_b32 s6, s1, 0xff
1500; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x80010
1501; GFX11-NEXT:    s_lshl_b32 s7, s7, 8
1502; GFX11-NEXT:    s_bfe_u32 s5, s0, 0x80008
1503; GFX11-NEXT:    s_lshl_b32 s1, s1, 16
1504; GFX11-NEXT:    s_or_b32 s6, s6, s7
1505; GFX11-NEXT:    s_lshr_b32 s2, s0, 24
1506; GFX11-NEXT:    s_and_b32 s4, s0, 0xff
1507; GFX11-NEXT:    s_bfe_u32 s0, s0, 0x80010
1508; GFX11-NEXT:    s_lshl_b32 s5, s5, 8
1509; GFX11-NEXT:    s_lshl_b32 s3, s3, 24
1510; GFX11-NEXT:    s_or_b32 s1, s6, s1
1511; GFX11-NEXT:    s_lshl_b32 s0, s0, 16
1512; GFX11-NEXT:    s_or_b32 s1, s1, s3
1513; GFX11-NEXT:    s_or_b32 s3, s4, s5
1514; GFX11-NEXT:    s_lshl_b32 s2, s2, 24
1515; GFX11-NEXT:    s_or_b32 s0, s3, s0
1516; GFX11-NEXT:    v_mov_b32_e32 v2, s1
1517; GFX11-NEXT:    s_or_b32 s0, s0, s2
1518; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1519; GFX11-NEXT:    v_dual_cndmask_b32 v1, s0, v2 :: v_dual_and_b32 v0, 3, v0
1520; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1521; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
1522; GFX11-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
1523; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1524; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1525; GFX11-NEXT:    ; return to shader part epilog
1526  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1527  %element = extractelement <8 x i8> %vector, i32 %idx
1528  ret i8 %element
1529}
1530
1531define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg %ptr) {
1532; GCN-LABEL: extractelement_sgpr_v8i8_idx0:
1533; GCN:       ; %bb.0:
1534; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1535; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1536; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
1537; GCN-NEXT:    s_lshr_b32 s1, s0, 24
1538; GCN-NEXT:    s_and_b32 s2, s0, 0xff
1539; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
1540; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1541; GCN-NEXT:    s_or_b32 s2, s2, s3
1542; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1543; GCN-NEXT:    s_or_b32 s0, s2, s0
1544; GCN-NEXT:    s_lshl_b32 s1, s1, 24
1545; GCN-NEXT:    s_or_b32 s0, s0, s1
1546; GCN-NEXT:    ; return to shader part epilog
1547;
1548; GFX10-LABEL: extractelement_sgpr_v8i8_idx0:
1549; GFX10:       ; %bb.0:
1550; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1551; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1552; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
1553; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
1554; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
1555; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1556; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1557; GFX10-NEXT:    s_or_b32 s1, s1, s2
1558; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1559; GFX10-NEXT:    s_or_b32 s1, s1, s3
1560; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
1561; GFX10-NEXT:    s_or_b32 s0, s1, s0
1562; GFX10-NEXT:    ; return to shader part epilog
1563;
1564; GFX11-LABEL: extractelement_sgpr_v8i8_idx0:
1565; GFX11:       ; %bb.0:
1566; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1567; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1568; GFX11-NEXT:    s_bfe_u32 s2, s0, 0x80008
1569; GFX11-NEXT:    s_and_b32 s1, s0, 0xff
1570; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x80010
1571; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
1572; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
1573; GFX11-NEXT:    s_or_b32 s1, s1, s2
1574; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
1575; GFX11-NEXT:    s_or_b32 s1, s1, s3
1576; GFX11-NEXT:    s_lshl_b32 s0, s0, 24
1577; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1578; GFX11-NEXT:    s_or_b32 s0, s1, s0
1579; GFX11-NEXT:    ; return to shader part epilog
1580  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1581  %element = extractelement <8 x i8> %vector, i32 0
1582  ret i8 %element
1583}
1584
1585define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg %ptr) {
1586; GCN-LABEL: extractelement_sgpr_v8i8_idx1:
1587; GCN:       ; %bb.0:
1588; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1589; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1590; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
1591; GCN-NEXT:    s_lshr_b32 s1, s0, 24
1592; GCN-NEXT:    s_and_b32 s2, s0, 0xff
1593; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
1594; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1595; GCN-NEXT:    s_or_b32 s2, s2, s3
1596; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1597; GCN-NEXT:    s_or_b32 s0, s2, s0
1598; GCN-NEXT:    s_lshl_b32 s1, s1, 24
1599; GCN-NEXT:    s_or_b32 s0, s0, s1
1600; GCN-NEXT:    s_lshr_b32 s0, s0, 8
1601; GCN-NEXT:    ; return to shader part epilog
1602;
1603; GFX10-LABEL: extractelement_sgpr_v8i8_idx1:
1604; GFX10:       ; %bb.0:
1605; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1606; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1607; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
1608; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
1609; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
1610; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1611; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1612; GFX10-NEXT:    s_or_b32 s1, s1, s2
1613; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1614; GFX10-NEXT:    s_or_b32 s1, s1, s3
1615; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
1616; GFX10-NEXT:    s_or_b32 s0, s1, s0
1617; GFX10-NEXT:    s_lshr_b32 s0, s0, 8
1618; GFX10-NEXT:    ; return to shader part epilog
1619;
1620; GFX11-LABEL: extractelement_sgpr_v8i8_idx1:
1621; GFX11:       ; %bb.0:
1622; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1623; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1624; GFX11-NEXT:    s_bfe_u32 s2, s0, 0x80008
1625; GFX11-NEXT:    s_and_b32 s1, s0, 0xff
1626; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x80010
1627; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
1628; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
1629; GFX11-NEXT:    s_or_b32 s1, s1, s2
1630; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
1631; GFX11-NEXT:    s_or_b32 s1, s1, s3
1632; GFX11-NEXT:    s_lshl_b32 s0, s0, 24
1633; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1634; GFX11-NEXT:    s_or_b32 s0, s1, s0
1635; GFX11-NEXT:    s_lshr_b32 s0, s0, 8
1636; GFX11-NEXT:    ; return to shader part epilog
1637  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1638  %element = extractelement <8 x i8> %vector, i32 1
1639  ret i8 %element
1640}
1641
1642define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg %ptr) {
1643; GCN-LABEL: extractelement_sgpr_v8i8_idx2:
1644; GCN:       ; %bb.0:
1645; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1646; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1647; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
1648; GCN-NEXT:    s_lshr_b32 s1, s0, 24
1649; GCN-NEXT:    s_and_b32 s2, s0, 0xff
1650; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
1651; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1652; GCN-NEXT:    s_or_b32 s2, s2, s3
1653; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1654; GCN-NEXT:    s_or_b32 s0, s2, s0
1655; GCN-NEXT:    s_lshl_b32 s1, s1, 24
1656; GCN-NEXT:    s_or_b32 s0, s0, s1
1657; GCN-NEXT:    s_lshr_b32 s0, s0, 16
1658; GCN-NEXT:    ; return to shader part epilog
1659;
1660; GFX10-LABEL: extractelement_sgpr_v8i8_idx2:
1661; GFX10:       ; %bb.0:
1662; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1663; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1664; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
1665; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
1666; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
1667; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1668; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1669; GFX10-NEXT:    s_or_b32 s1, s1, s2
1670; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1671; GFX10-NEXT:    s_or_b32 s1, s1, s3
1672; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
1673; GFX10-NEXT:    s_or_b32 s0, s1, s0
1674; GFX10-NEXT:    s_lshr_b32 s0, s0, 16
1675; GFX10-NEXT:    ; return to shader part epilog
1676;
1677; GFX11-LABEL: extractelement_sgpr_v8i8_idx2:
1678; GFX11:       ; %bb.0:
1679; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1680; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1681; GFX11-NEXT:    s_bfe_u32 s2, s0, 0x80008
1682; GFX11-NEXT:    s_and_b32 s1, s0, 0xff
1683; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x80010
1684; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
1685; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
1686; GFX11-NEXT:    s_or_b32 s1, s1, s2
1687; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
1688; GFX11-NEXT:    s_or_b32 s1, s1, s3
1689; GFX11-NEXT:    s_lshl_b32 s0, s0, 24
1690; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1691; GFX11-NEXT:    s_or_b32 s0, s1, s0
1692; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
1693; GFX11-NEXT:    ; return to shader part epilog
1694  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1695  %element = extractelement <8 x i8> %vector, i32 2
1696  ret i8 %element
1697}
1698
1699define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg %ptr) {
1700; GCN-LABEL: extractelement_sgpr_v8i8_idx3:
1701; GCN:       ; %bb.0:
1702; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1703; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1704; GCN-NEXT:    s_bfe_u32 s3, s0, 0x80008
1705; GCN-NEXT:    s_lshr_b32 s1, s0, 24
1706; GCN-NEXT:    s_and_b32 s2, s0, 0xff
1707; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
1708; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1709; GCN-NEXT:    s_or_b32 s2, s2, s3
1710; GCN-NEXT:    s_lshl_b32 s0, s0, 16
1711; GCN-NEXT:    s_or_b32 s0, s2, s0
1712; GCN-NEXT:    s_lshl_b32 s1, s1, 24
1713; GCN-NEXT:    s_or_b32 s0, s0, s1
1714; GCN-NEXT:    s_lshr_b32 s0, s0, 24
1715; GCN-NEXT:    ; return to shader part epilog
1716;
1717; GFX10-LABEL: extractelement_sgpr_v8i8_idx3:
1718; GFX10:       ; %bb.0:
1719; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1720; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1721; GFX10-NEXT:    s_bfe_u32 s2, s0, 0x80008
1722; GFX10-NEXT:    s_and_b32 s1, s0, 0xff
1723; GFX10-NEXT:    s_bfe_u32 s3, s0, 0x80010
1724; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1725; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1726; GFX10-NEXT:    s_or_b32 s1, s1, s2
1727; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1728; GFX10-NEXT:    s_or_b32 s1, s1, s3
1729; GFX10-NEXT:    s_lshl_b32 s0, s0, 24
1730; GFX10-NEXT:    s_or_b32 s0, s1, s0
1731; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1732; GFX10-NEXT:    ; return to shader part epilog
1733;
1734; GFX11-LABEL: extractelement_sgpr_v8i8_idx3:
1735; GFX11:       ; %bb.0:
1736; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1737; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1738; GFX11-NEXT:    s_bfe_u32 s2, s0, 0x80008
1739; GFX11-NEXT:    s_and_b32 s1, s0, 0xff
1740; GFX11-NEXT:    s_bfe_u32 s3, s0, 0x80010
1741; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
1742; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
1743; GFX11-NEXT:    s_or_b32 s1, s1, s2
1744; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
1745; GFX11-NEXT:    s_or_b32 s1, s1, s3
1746; GFX11-NEXT:    s_lshl_b32 s0, s0, 24
1747; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1748; GFX11-NEXT:    s_or_b32 s0, s1, s0
1749; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
1750; GFX11-NEXT:    ; return to shader part epilog
1751  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1752  %element = extractelement <8 x i8> %vector, i32 3
1753  ret i8 %element
1754}
1755
1756define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg %ptr) {
1757; GCN-LABEL: extractelement_sgpr_v8i8_idx4:
1758; GCN:       ; %bb.0:
1759; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1760; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1761; GCN-NEXT:    s_bfe_u32 s3, s1, 0x80008
1762; GCN-NEXT:    s_lshr_b32 s0, s1, 24
1763; GCN-NEXT:    s_and_b32 s2, s1, 0xff
1764; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
1765; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1766; GCN-NEXT:    s_or_b32 s2, s2, s3
1767; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1768; GCN-NEXT:    s_or_b32 s1, s2, s1
1769; GCN-NEXT:    s_lshl_b32 s0, s0, 24
1770; GCN-NEXT:    s_or_b32 s0, s1, s0
1771; GCN-NEXT:    ; return to shader part epilog
1772;
1773; GFX10-LABEL: extractelement_sgpr_v8i8_idx4:
1774; GFX10:       ; %bb.0:
1775; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1776; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1777; GFX10-NEXT:    s_bfe_u32 s2, s1, 0x80008
1778; GFX10-NEXT:    s_and_b32 s0, s1, 0xff
1779; GFX10-NEXT:    s_bfe_u32 s3, s1, 0x80010
1780; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1781; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1782; GFX10-NEXT:    s_or_b32 s0, s0, s2
1783; GFX10-NEXT:    s_lshr_b32 s1, s1, 24
1784; GFX10-NEXT:    s_or_b32 s0, s0, s3
1785; GFX10-NEXT:    s_lshl_b32 s1, s1, 24
1786; GFX10-NEXT:    s_or_b32 s0, s0, s1
1787; GFX10-NEXT:    ; return to shader part epilog
1788;
1789; GFX11-LABEL: extractelement_sgpr_v8i8_idx4:
1790; GFX11:       ; %bb.0:
1791; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1792; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1793; GFX11-NEXT:    s_bfe_u32 s2, s1, 0x80008
1794; GFX11-NEXT:    s_and_b32 s0, s1, 0xff
1795; GFX11-NEXT:    s_bfe_u32 s3, s1, 0x80010
1796; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
1797; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
1798; GFX11-NEXT:    s_or_b32 s0, s0, s2
1799; GFX11-NEXT:    s_lshr_b32 s1, s1, 24
1800; GFX11-NEXT:    s_or_b32 s0, s0, s3
1801; GFX11-NEXT:    s_lshl_b32 s1, s1, 24
1802; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1803; GFX11-NEXT:    s_or_b32 s0, s0, s1
1804; GFX11-NEXT:    ; return to shader part epilog
1805  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1806  %element = extractelement <8 x i8> %vector, i32 4
1807  ret i8 %element
1808}
1809
1810define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg %ptr) {
1811; GCN-LABEL: extractelement_sgpr_v8i8_idx5:
1812; GCN:       ; %bb.0:
1813; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1814; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1815; GCN-NEXT:    s_bfe_u32 s3, s1, 0x80008
1816; GCN-NEXT:    s_lshr_b32 s0, s1, 24
1817; GCN-NEXT:    s_and_b32 s2, s1, 0xff
1818; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
1819; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1820; GCN-NEXT:    s_or_b32 s2, s2, s3
1821; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1822; GCN-NEXT:    s_or_b32 s1, s2, s1
1823; GCN-NEXT:    s_lshl_b32 s0, s0, 24
1824; GCN-NEXT:    s_or_b32 s0, s1, s0
1825; GCN-NEXT:    s_lshr_b32 s0, s0, 8
1826; GCN-NEXT:    ; return to shader part epilog
1827;
1828; GFX10-LABEL: extractelement_sgpr_v8i8_idx5:
1829; GFX10:       ; %bb.0:
1830; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1831; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1832; GFX10-NEXT:    s_bfe_u32 s2, s1, 0x80008
1833; GFX10-NEXT:    s_and_b32 s0, s1, 0xff
1834; GFX10-NEXT:    s_bfe_u32 s3, s1, 0x80010
1835; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1836; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1837; GFX10-NEXT:    s_or_b32 s0, s0, s2
1838; GFX10-NEXT:    s_lshr_b32 s1, s1, 24
1839; GFX10-NEXT:    s_or_b32 s0, s0, s3
1840; GFX10-NEXT:    s_lshl_b32 s1, s1, 24
1841; GFX10-NEXT:    s_or_b32 s0, s0, s1
1842; GFX10-NEXT:    s_lshr_b32 s0, s0, 8
1843; GFX10-NEXT:    ; return to shader part epilog
1844;
1845; GFX11-LABEL: extractelement_sgpr_v8i8_idx5:
1846; GFX11:       ; %bb.0:
1847; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1848; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1849; GFX11-NEXT:    s_bfe_u32 s2, s1, 0x80008
1850; GFX11-NEXT:    s_and_b32 s0, s1, 0xff
1851; GFX11-NEXT:    s_bfe_u32 s3, s1, 0x80010
1852; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
1853; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
1854; GFX11-NEXT:    s_or_b32 s0, s0, s2
1855; GFX11-NEXT:    s_lshr_b32 s1, s1, 24
1856; GFX11-NEXT:    s_or_b32 s0, s0, s3
1857; GFX11-NEXT:    s_lshl_b32 s1, s1, 24
1858; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1859; GFX11-NEXT:    s_or_b32 s0, s0, s1
1860; GFX11-NEXT:    s_lshr_b32 s0, s0, 8
1861; GFX11-NEXT:    ; return to shader part epilog
1862  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1863  %element = extractelement <8 x i8> %vector, i32 5
1864  ret i8 %element
1865}
1866
1867define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg %ptr) {
1868; GCN-LABEL: extractelement_sgpr_v8i8_idx6:
1869; GCN:       ; %bb.0:
1870; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1871; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1872; GCN-NEXT:    s_bfe_u32 s3, s1, 0x80008
1873; GCN-NEXT:    s_lshr_b32 s0, s1, 24
1874; GCN-NEXT:    s_and_b32 s2, s1, 0xff
1875; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
1876; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1877; GCN-NEXT:    s_or_b32 s2, s2, s3
1878; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1879; GCN-NEXT:    s_or_b32 s1, s2, s1
1880; GCN-NEXT:    s_lshl_b32 s0, s0, 24
1881; GCN-NEXT:    s_or_b32 s0, s1, s0
1882; GCN-NEXT:    s_lshr_b32 s0, s0, 16
1883; GCN-NEXT:    ; return to shader part epilog
1884;
1885; GFX10-LABEL: extractelement_sgpr_v8i8_idx6:
1886; GFX10:       ; %bb.0:
1887; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1888; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1889; GFX10-NEXT:    s_bfe_u32 s2, s1, 0x80008
1890; GFX10-NEXT:    s_and_b32 s0, s1, 0xff
1891; GFX10-NEXT:    s_bfe_u32 s3, s1, 0x80010
1892; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1893; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1894; GFX10-NEXT:    s_or_b32 s0, s0, s2
1895; GFX10-NEXT:    s_lshr_b32 s1, s1, 24
1896; GFX10-NEXT:    s_or_b32 s0, s0, s3
1897; GFX10-NEXT:    s_lshl_b32 s1, s1, 24
1898; GFX10-NEXT:    s_or_b32 s0, s0, s1
1899; GFX10-NEXT:    s_lshr_b32 s0, s0, 16
1900; GFX10-NEXT:    ; return to shader part epilog
1901;
1902; GFX11-LABEL: extractelement_sgpr_v8i8_idx6:
1903; GFX11:       ; %bb.0:
1904; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1905; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1906; GFX11-NEXT:    s_bfe_u32 s2, s1, 0x80008
1907; GFX11-NEXT:    s_and_b32 s0, s1, 0xff
1908; GFX11-NEXT:    s_bfe_u32 s3, s1, 0x80010
1909; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
1910; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
1911; GFX11-NEXT:    s_or_b32 s0, s0, s2
1912; GFX11-NEXT:    s_lshr_b32 s1, s1, 24
1913; GFX11-NEXT:    s_or_b32 s0, s0, s3
1914; GFX11-NEXT:    s_lshl_b32 s1, s1, 24
1915; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1916; GFX11-NEXT:    s_or_b32 s0, s0, s1
1917; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
1918; GFX11-NEXT:    ; return to shader part epilog
1919  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1920  %element = extractelement <8 x i8> %vector, i32 6
1921  ret i8 %element
1922}
1923
1924define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg %ptr) {
1925; GCN-LABEL: extractelement_sgpr_v8i8_idx7:
1926; GCN:       ; %bb.0:
1927; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1928; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1929; GCN-NEXT:    s_bfe_u32 s3, s1, 0x80008
1930; GCN-NEXT:    s_lshr_b32 s0, s1, 24
1931; GCN-NEXT:    s_and_b32 s2, s1, 0xff
1932; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
1933; GCN-NEXT:    s_lshl_b32 s3, s3, 8
1934; GCN-NEXT:    s_or_b32 s2, s2, s3
1935; GCN-NEXT:    s_lshl_b32 s1, s1, 16
1936; GCN-NEXT:    s_or_b32 s1, s2, s1
1937; GCN-NEXT:    s_lshl_b32 s0, s0, 24
1938; GCN-NEXT:    s_or_b32 s0, s1, s0
1939; GCN-NEXT:    s_lshr_b32 s0, s0, 24
1940; GCN-NEXT:    ; return to shader part epilog
1941;
1942; GFX10-LABEL: extractelement_sgpr_v8i8_idx7:
1943; GFX10:       ; %bb.0:
1944; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1945; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1946; GFX10-NEXT:    s_bfe_u32 s2, s1, 0x80008
1947; GFX10-NEXT:    s_and_b32 s0, s1, 0xff
1948; GFX10-NEXT:    s_bfe_u32 s3, s1, 0x80010
1949; GFX10-NEXT:    s_lshl_b32 s2, s2, 8
1950; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
1951; GFX10-NEXT:    s_or_b32 s0, s0, s2
1952; GFX10-NEXT:    s_lshr_b32 s1, s1, 24
1953; GFX10-NEXT:    s_or_b32 s0, s0, s3
1954; GFX10-NEXT:    s_lshl_b32 s1, s1, 24
1955; GFX10-NEXT:    s_or_b32 s0, s0, s1
1956; GFX10-NEXT:    s_lshr_b32 s0, s0, 24
1957; GFX10-NEXT:    ; return to shader part epilog
1958;
1959; GFX11-LABEL: extractelement_sgpr_v8i8_idx7:
1960; GFX11:       ; %bb.0:
1961; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1962; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1963; GFX11-NEXT:    s_bfe_u32 s2, s1, 0x80008
1964; GFX11-NEXT:    s_and_b32 s0, s1, 0xff
1965; GFX11-NEXT:    s_bfe_u32 s3, s1, 0x80010
1966; GFX11-NEXT:    s_lshl_b32 s2, s2, 8
1967; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
1968; GFX11-NEXT:    s_or_b32 s0, s0, s2
1969; GFX11-NEXT:    s_lshr_b32 s1, s1, 24
1970; GFX11-NEXT:    s_or_b32 s0, s0, s3
1971; GFX11-NEXT:    s_lshl_b32 s1, s1, 24
1972; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1973; GFX11-NEXT:    s_or_b32 s0, s0, s1
1974; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
1975; GFX11-NEXT:    ; return to shader part epilog
1976  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
1977  %element = extractelement <8 x i8> %vector, i32 7
1978  ret i8 %element
1979}
1980
1981define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) {
1982; GFX9-LABEL: extractelement_vgpr_v8i8_idx0:
1983; GFX9:       ; %bb.0:
1984; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1985; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
1986; GFX9-NEXT:    v_mov_b32_e32 v2, 8
1987; GFX9-NEXT:    s_waitcnt vmcnt(0)
1988; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
1989; GFX9-NEXT:    v_mov_b32_e32 v3, 16
1990; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
1991; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1992; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1993; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
1994; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
1995; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
1996; GFX9-NEXT:    s_setpc_b64 s[30:31]
1997;
1998; GFX8-LABEL: extractelement_vgpr_v8i8_idx0:
1999; GFX8:       ; %bb.0:
2000; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2001; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2002; GFX8-NEXT:    s_waitcnt vmcnt(0)
2003; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2004; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2005; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2006; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2007; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2008; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2009; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2010; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2011; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2012; GFX8-NEXT:    s_setpc_b64 s[30:31]
2013;
2014; GFX7-LABEL: extractelement_vgpr_v8i8_idx0:
2015; GFX7:       ; %bb.0:
2016; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2017; GFX7-NEXT:    s_mov_b32 s6, 0
2018; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2019; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2020; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
2021; GFX7-NEXT:    s_waitcnt vmcnt(0)
2022; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
2023; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
2024; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
2025; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
2026; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2027; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2028; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2029; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2030; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2031; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2032; GFX7-NEXT:    s_setpc_b64 s[30:31]
2033;
2034; GFX10-LABEL: extractelement_vgpr_v8i8_idx0:
2035; GFX10:       ; %bb.0:
2036; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2037; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2038; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2039; GFX10-NEXT:    s_waitcnt vmcnt(0)
2040; GFX10-NEXT:    v_mov_b32_e32 v1, 8
2041; GFX10-NEXT:    v_mov_b32_e32 v2, 16
2042; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2043; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2044; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2045; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
2046; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2047; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
2048; GFX10-NEXT:    s_setpc_b64 s[30:31]
2049;
2050; GFX11-LABEL: extractelement_vgpr_v8i8_idx0:
2051; GFX11:       ; %bb.0:
2052; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2053; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2054; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
2055; GFX11-NEXT:    s_waitcnt vmcnt(0)
2056; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
2057; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
2058; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2059; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2060; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
2061; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2062; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2063; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
2064; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2065; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2066; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
2067; GFX11-NEXT:    s_setpc_b64 s[30:31]
2068  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
2069  %element = extractelement <8 x i8> %vector, i32 0
2070  ret i8 %element
2071}
2072
2073define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) {
2074; GFX9-LABEL: extractelement_vgpr_v8i8_idx1:
2075; GFX9:       ; %bb.0:
2076; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2077; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2078; GFX9-NEXT:    s_mov_b32 s4, 8
2079; GFX9-NEXT:    s_waitcnt vmcnt(0)
2080; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
2081; GFX9-NEXT:    v_mov_b32_e32 v2, 16
2082; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2083; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2084; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2085; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v4
2086; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2087; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
2088; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2089; GFX9-NEXT:    s_setpc_b64 s[30:31]
2090;
2091; GFX8-LABEL: extractelement_vgpr_v8i8_idx1:
2092; GFX8:       ; %bb.0:
2093; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2094; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2095; GFX8-NEXT:    s_waitcnt vmcnt(0)
2096; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2097; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2098; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2099; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2100; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2101; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2102; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2103; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2104; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2105; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2106; GFX8-NEXT:    s_setpc_b64 s[30:31]
2107;
2108; GFX7-LABEL: extractelement_vgpr_v8i8_idx1:
2109; GFX7:       ; %bb.0:
2110; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2111; GFX7-NEXT:    s_mov_b32 s6, 0
2112; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2113; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2114; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
2115; GFX7-NEXT:    s_waitcnt vmcnt(0)
2116; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
2117; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
2118; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
2119; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
2120; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2121; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2122; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2123; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2124; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2125; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2126; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2127; GFX7-NEXT:    s_setpc_b64 s[30:31]
2128;
2129; GFX10-LABEL: extractelement_vgpr_v8i8_idx1:
2130; GFX10:       ; %bb.0:
2131; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2132; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2133; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2134; GFX10-NEXT:    s_mov_b32 s4, 8
2135; GFX10-NEXT:    s_waitcnt vmcnt(0)
2136; GFX10-NEXT:    v_mov_b32_e32 v1, 16
2137; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2138; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2139; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2140; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v2
2141; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
2142; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
2143; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2144; GFX10-NEXT:    s_setpc_b64 s[30:31]
2145;
2146; GFX11-LABEL: extractelement_vgpr_v8i8_idx1:
2147; GFX11:       ; %bb.0:
2148; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2149; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2150; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
2151; GFX11-NEXT:    s_waitcnt vmcnt(0)
2152; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
2153; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
2154; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2155; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2156; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
2157; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2158; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2159; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
2160; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2161; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2162; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
2163; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2164; GFX11-NEXT:    s_setpc_b64 s[30:31]
2165  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
2166  %element = extractelement <8 x i8> %vector, i32 1
2167  ret i8 %element
2168}
2169
2170define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) {
2171; GFX9-LABEL: extractelement_vgpr_v8i8_idx2:
2172; GFX9:       ; %bb.0:
2173; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2174; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2175; GFX9-NEXT:    v_mov_b32_e32 v2, 8
2176; GFX9-NEXT:    s_mov_b32 s4, 16
2177; GFX9-NEXT:    s_waitcnt vmcnt(0)
2178; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
2179; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2180; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2181; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2182; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
2183; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2184; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
2185; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2186; GFX9-NEXT:    s_setpc_b64 s[30:31]
2187;
2188; GFX8-LABEL: extractelement_vgpr_v8i8_idx2:
2189; GFX8:       ; %bb.0:
2190; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2191; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2192; GFX8-NEXT:    s_waitcnt vmcnt(0)
2193; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2194; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2195; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2196; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2197; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2198; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2199; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2200; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2201; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2202; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2203; GFX8-NEXT:    s_setpc_b64 s[30:31]
2204;
2205; GFX7-LABEL: extractelement_vgpr_v8i8_idx2:
2206; GFX7:       ; %bb.0:
2207; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2208; GFX7-NEXT:    s_mov_b32 s6, 0
2209; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2210; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2211; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
2212; GFX7-NEXT:    s_waitcnt vmcnt(0)
2213; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
2214; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
2215; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
2216; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
2217; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2218; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2219; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2220; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2221; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2222; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2223; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2224; GFX7-NEXT:    s_setpc_b64 s[30:31]
2225;
2226; GFX10-LABEL: extractelement_vgpr_v8i8_idx2:
2227; GFX10:       ; %bb.0:
2228; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2229; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2230; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2231; GFX10-NEXT:    s_waitcnt vmcnt(0)
2232; GFX10-NEXT:    v_mov_b32_e32 v1, 8
2233; GFX10-NEXT:    s_mov_b32 s4, 16
2234; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2235; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
2236; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2237; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
2238; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2239; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
2240; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2241; GFX10-NEXT:    s_setpc_b64 s[30:31]
2242;
2243; GFX11-LABEL: extractelement_vgpr_v8i8_idx2:
2244; GFX11:       ; %bb.0:
2245; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2246; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2247; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
2248; GFX11-NEXT:    s_waitcnt vmcnt(0)
2249; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
2250; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
2251; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2252; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2253; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
2254; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2255; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2256; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
2257; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2258; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2259; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
2260; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2261; GFX11-NEXT:    s_setpc_b64 s[30:31]
2262  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
2263  %element = extractelement <8 x i8> %vector, i32 2
2264  ret i8 %element
2265}
2266
2267define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) {
2268; GFX9-LABEL: extractelement_vgpr_v8i8_idx3:
2269; GFX9:       ; %bb.0:
2270; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2271; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2272; GFX9-NEXT:    v_mov_b32_e32 v2, 8
2273; GFX9-NEXT:    s_waitcnt vmcnt(0)
2274; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
2275; GFX9-NEXT:    v_mov_b32_e32 v3, 16
2276; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
2277; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2278; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2279; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
2280; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2281; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2282; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2283; GFX9-NEXT:    s_setpc_b64 s[30:31]
2284;
2285; GFX8-LABEL: extractelement_vgpr_v8i8_idx3:
2286; GFX8:       ; %bb.0:
2287; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2288; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2289; GFX8-NEXT:    s_waitcnt vmcnt(0)
2290; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2291; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2292; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2293; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2294; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2295; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2296; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2297; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2298; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2299; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2300; GFX8-NEXT:    s_setpc_b64 s[30:31]
2301;
2302; GFX7-LABEL: extractelement_vgpr_v8i8_idx3:
2303; GFX7:       ; %bb.0:
2304; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2305; GFX7-NEXT:    s_mov_b32 s6, 0
2306; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2307; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2308; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
2309; GFX7-NEXT:    s_waitcnt vmcnt(0)
2310; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
2311; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
2312; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
2313; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
2314; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2315; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2316; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2317; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
2318; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
2319; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
2320; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2321; GFX7-NEXT:    s_setpc_b64 s[30:31]
2322;
2323; GFX10-LABEL: extractelement_vgpr_v8i8_idx3:
2324; GFX10:       ; %bb.0:
2325; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2326; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2327; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2328; GFX10-NEXT:    s_waitcnt vmcnt(0)
2329; GFX10-NEXT:    v_mov_b32_e32 v1, 8
2330; GFX10-NEXT:    v_mov_b32_e32 v2, 16
2331; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2332; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2333; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2334; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
2335; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2336; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
2337; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2338; GFX10-NEXT:    s_setpc_b64 s[30:31]
2339;
2340; GFX11-LABEL: extractelement_vgpr_v8i8_idx3:
2341; GFX11:       ; %bb.0:
2342; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2343; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2344; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
2345; GFX11-NEXT:    s_waitcnt vmcnt(0)
2346; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
2347; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
2348; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
2349; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2350; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
2351; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2352; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2353; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
2354; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2355; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2356; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
2357; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2358; GFX11-NEXT:    s_setpc_b64 s[30:31]
2359  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
2360  %element = extractelement <8 x i8> %vector, i32 3
2361  ret i8 %element
2362}
2363
2364define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) {
2365; GFX9-LABEL: extractelement_vgpr_v8i8_idx4:
2366; GFX9:       ; %bb.0:
2367; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2368; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2369; GFX9-NEXT:    v_mov_b32_e32 v2, 8
2370; GFX9-NEXT:    s_waitcnt vmcnt(0)
2371; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
2372; GFX9-NEXT:    v_mov_b32_e32 v3, 16
2373; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
2374; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2375; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2376; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
2377; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2378; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2379; GFX9-NEXT:    s_setpc_b64 s[30:31]
2380;
2381; GFX8-LABEL: extractelement_vgpr_v8i8_idx4:
2382; GFX8:       ; %bb.0:
2383; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2384; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2385; GFX8-NEXT:    s_waitcnt vmcnt(0)
2386; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2387; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2388; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2389; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2390; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2391; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2392; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2393; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2394; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2395; GFX8-NEXT:    s_setpc_b64 s[30:31]
2396;
2397; GFX7-LABEL: extractelement_vgpr_v8i8_idx4:
2398; GFX7:       ; %bb.0:
2399; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2400; GFX7-NEXT:    s_mov_b32 s6, 0
2401; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2402; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2403; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
2404; GFX7-NEXT:    s_waitcnt vmcnt(0)
2405; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
2406; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
2407; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
2408; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
2409; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2410; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2411; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2412; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
2413; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
2414; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
2415; GFX7-NEXT:    s_setpc_b64 s[30:31]
2416;
2417; GFX10-LABEL: extractelement_vgpr_v8i8_idx4:
2418; GFX10:       ; %bb.0:
2419; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2420; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2421; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2422; GFX10-NEXT:    s_waitcnt vmcnt(0)
2423; GFX10-NEXT:    v_mov_b32_e32 v0, 8
2424; GFX10-NEXT:    v_mov_b32_e32 v2, 16
2425; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2426; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2427; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2428; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
2429; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2430; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
2431; GFX10-NEXT:    s_setpc_b64 s[30:31]
2432;
2433; GFX11-LABEL: extractelement_vgpr_v8i8_idx4:
2434; GFX11:       ; %bb.0:
2435; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2436; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2437; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
2438; GFX11-NEXT:    s_waitcnt vmcnt(0)
2439; GFX11-NEXT:    v_bfe_u32 v0, v1, 8, 8
2440; GFX11-NEXT:    v_bfe_u32 v2, v1, 16, 8
2441; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2442; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2443; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2444; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2445; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2446; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
2447; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2448; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2449; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
2450; GFX11-NEXT:    s_setpc_b64 s[30:31]
2451  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
2452  %element = extractelement <8 x i8> %vector, i32 4
2453  ret i8 %element
2454}
2455
2456define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) {
2457; GFX9-LABEL: extractelement_vgpr_v8i8_idx5:
2458; GFX9:       ; %bb.0:
2459; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2460; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2461; GFX9-NEXT:    s_mov_b32 s4, 8
2462; GFX9-NEXT:    s_waitcnt vmcnt(0)
2463; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
2464; GFX9-NEXT:    v_mov_b32_e32 v2, 16
2465; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2466; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2467; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2468; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v4
2469; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2470; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
2471; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2472; GFX9-NEXT:    s_setpc_b64 s[30:31]
2473;
2474; GFX8-LABEL: extractelement_vgpr_v8i8_idx5:
2475; GFX8:       ; %bb.0:
2476; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2477; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2478; GFX8-NEXT:    s_waitcnt vmcnt(0)
2479; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2480; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2481; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2482; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2483; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2484; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2485; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2486; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2487; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2488; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2489; GFX8-NEXT:    s_setpc_b64 s[30:31]
2490;
2491; GFX7-LABEL: extractelement_vgpr_v8i8_idx5:
2492; GFX7:       ; %bb.0:
2493; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2494; GFX7-NEXT:    s_mov_b32 s6, 0
2495; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2496; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2497; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
2498; GFX7-NEXT:    s_waitcnt vmcnt(0)
2499; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
2500; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
2501; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
2502; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
2503; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2504; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2505; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2506; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
2507; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
2508; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
2509; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2510; GFX7-NEXT:    s_setpc_b64 s[30:31]
2511;
2512; GFX10-LABEL: extractelement_vgpr_v8i8_idx5:
2513; GFX10:       ; %bb.0:
2514; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2515; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2516; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2517; GFX10-NEXT:    s_mov_b32 s4, 8
2518; GFX10-NEXT:    s_waitcnt vmcnt(0)
2519; GFX10-NEXT:    v_mov_b32_e32 v0, 16
2520; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2521; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2522; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2523; GFX10-NEXT:    v_and_or_b32 v1, 0xff, v1, v2
2524; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
2525; GFX10-NEXT:    v_or3_b32 v0, v1, v0, v2
2526; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2527; GFX10-NEXT:    s_setpc_b64 s[30:31]
2528;
2529; GFX11-LABEL: extractelement_vgpr_v8i8_idx5:
2530; GFX11:       ; %bb.0:
2531; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2532; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2533; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
2534; GFX11-NEXT:    s_waitcnt vmcnt(0)
2535; GFX11-NEXT:    v_bfe_u32 v0, v1, 8, 8
2536; GFX11-NEXT:    v_bfe_u32 v2, v1, 16, 8
2537; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2538; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2539; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2540; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2541; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2542; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
2543; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2544; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2545; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
2546; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
2547; GFX11-NEXT:    s_setpc_b64 s[30:31]
2548  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
2549  %element = extractelement <8 x i8> %vector, i32 5
2550  ret i8 %element
2551}
2552
2553define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) {
2554; GFX9-LABEL: extractelement_vgpr_v8i8_idx6:
2555; GFX9:       ; %bb.0:
2556; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2557; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2558; GFX9-NEXT:    v_mov_b32_e32 v2, 8
2559; GFX9-NEXT:    s_mov_b32 s4, 16
2560; GFX9-NEXT:    s_waitcnt vmcnt(0)
2561; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
2562; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2563; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2564; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2565; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
2566; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2567; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
2568; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2569; GFX9-NEXT:    s_setpc_b64 s[30:31]
2570;
2571; GFX8-LABEL: extractelement_vgpr_v8i8_idx6:
2572; GFX8:       ; %bb.0:
2573; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2574; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2575; GFX8-NEXT:    s_waitcnt vmcnt(0)
2576; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2577; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2578; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2579; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2580; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2581; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2582; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2583; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2584; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2585; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2586; GFX8-NEXT:    s_setpc_b64 s[30:31]
2587;
2588; GFX7-LABEL: extractelement_vgpr_v8i8_idx6:
2589; GFX7:       ; %bb.0:
2590; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2591; GFX7-NEXT:    s_mov_b32 s6, 0
2592; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2593; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2594; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
2595; GFX7-NEXT:    s_waitcnt vmcnt(0)
2596; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
2597; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
2598; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
2599; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
2600; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2601; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2602; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2603; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
2604; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
2605; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
2606; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2607; GFX7-NEXT:    s_setpc_b64 s[30:31]
2608;
2609; GFX10-LABEL: extractelement_vgpr_v8i8_idx6:
2610; GFX10:       ; %bb.0:
2611; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2612; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2613; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2614; GFX10-NEXT:    s_waitcnt vmcnt(0)
2615; GFX10-NEXT:    v_mov_b32_e32 v0, 8
2616; GFX10-NEXT:    s_mov_b32 s4, 16
2617; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2618; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
2619; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2620; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
2621; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
2622; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
2623; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2624; GFX10-NEXT:    s_setpc_b64 s[30:31]
2625;
2626; GFX11-LABEL: extractelement_vgpr_v8i8_idx6:
2627; GFX11:       ; %bb.0:
2628; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2629; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2630; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
2631; GFX11-NEXT:    s_waitcnt vmcnt(0)
2632; GFX11-NEXT:    v_bfe_u32 v0, v1, 8, 8
2633; GFX11-NEXT:    v_bfe_u32 v2, v1, 16, 8
2634; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2635; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2636; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2637; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2638; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2639; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
2640; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2641; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2642; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
2643; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2644; GFX11-NEXT:    s_setpc_b64 s[30:31]
2645  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
2646  %element = extractelement <8 x i8> %vector, i32 6
2647  ret i8 %element
2648}
2649
2650define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) {
2651; GFX9-LABEL: extractelement_vgpr_v8i8_idx7:
2652; GFX9:       ; %bb.0:
2653; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2654; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2655; GFX9-NEXT:    v_mov_b32_e32 v2, 8
2656; GFX9-NEXT:    s_waitcnt vmcnt(0)
2657; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
2658; GFX9-NEXT:    v_mov_b32_e32 v3, 16
2659; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
2660; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2661; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2662; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
2663; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
2664; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
2665; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2666; GFX9-NEXT:    s_setpc_b64 s[30:31]
2667;
2668; GFX8-LABEL: extractelement_vgpr_v8i8_idx7:
2669; GFX8:       ; %bb.0:
2670; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2671; GFX8-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2672; GFX8-NEXT:    s_waitcnt vmcnt(0)
2673; GFX8-NEXT:    v_mov_b32_e32 v0, 8
2674; GFX8-NEXT:    v_mov_b32_e32 v2, 16
2675; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2676; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2677; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2678; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2679; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2680; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2681; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2682; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2683; GFX8-NEXT:    s_setpc_b64 s[30:31]
2684;
2685; GFX7-LABEL: extractelement_vgpr_v8i8_idx7:
2686; GFX7:       ; %bb.0:
2687; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2688; GFX7-NEXT:    s_mov_b32 s6, 0
2689; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2690; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2691; GFX7-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
2692; GFX7-NEXT:    s_waitcnt vmcnt(0)
2693; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
2694; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
2695; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
2696; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
2697; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
2698; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2699; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
2700; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
2701; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
2702; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
2703; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2704; GFX7-NEXT:    s_setpc_b64 s[30:31]
2705;
2706; GFX10-LABEL: extractelement_vgpr_v8i8_idx7:
2707; GFX10:       ; %bb.0:
2708; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2709; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2710; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
2711; GFX10-NEXT:    s_waitcnt vmcnt(0)
2712; GFX10-NEXT:    v_mov_b32_e32 v0, 8
2713; GFX10-NEXT:    v_mov_b32_e32 v2, 16
2714; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2715; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2716; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2717; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
2718; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2719; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
2720; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2721; GFX10-NEXT:    s_setpc_b64 s[30:31]
2722;
2723; GFX11-LABEL: extractelement_vgpr_v8i8_idx7:
2724; GFX11:       ; %bb.0:
2725; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2726; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2727; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
2728; GFX11-NEXT:    s_waitcnt vmcnt(0)
2729; GFX11-NEXT:    v_bfe_u32 v0, v1, 8, 8
2730; GFX11-NEXT:    v_bfe_u32 v2, v1, 16, 8
2731; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
2732; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2733; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
2734; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2735; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2736; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
2737; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
2738; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2739; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
2740; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
2741; GFX11-NEXT:    s_setpc_b64 s[30:31]
2742  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
2743  %element = extractelement <8 x i8> %vector, i32 7
2744  ret i8 %element
2745}
2746
2747define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
2748; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
2749; GCN:       ; %bb.0:
2750; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
2751; GCN-NEXT:    s_waitcnt lgkmcnt(0)
2752; GCN-NEXT:    s_bfe_u32 s10, s0, 0x80008
2753; GCN-NEXT:    s_lshr_b32 s5, s0, 24
2754; GCN-NEXT:    s_and_b32 s9, s0, 0xff
2755; GCN-NEXT:    s_lshl_b32 s10, s10, 8
2756; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
2757; GCN-NEXT:    s_or_b32 s9, s9, s10
2758; GCN-NEXT:    s_lshl_b32 s0, s0, 16
2759; GCN-NEXT:    s_or_b32 s0, s9, s0
2760; GCN-NEXT:    s_lshl_b32 s5, s5, 24
2761; GCN-NEXT:    s_bfe_u32 s9, s1, 0x80008
2762; GCN-NEXT:    s_lshr_b32 s6, s1, 24
2763; GCN-NEXT:    s_or_b32 s0, s0, s5
2764; GCN-NEXT:    s_and_b32 s5, s1, 0xff
2765; GCN-NEXT:    s_lshl_b32 s9, s9, 8
2766; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
2767; GCN-NEXT:    s_or_b32 s5, s5, s9
2768; GCN-NEXT:    s_lshl_b32 s1, s1, 16
2769; GCN-NEXT:    s_or_b32 s1, s5, s1
2770; GCN-NEXT:    s_lshl_b32 s5, s6, 24
2771; GCN-NEXT:    s_bfe_u32 s6, s2, 0x80008
2772; GCN-NEXT:    s_lshr_b32 s7, s2, 24
2773; GCN-NEXT:    s_or_b32 s1, s1, s5
2774; GCN-NEXT:    s_and_b32 s5, s2, 0xff
2775; GCN-NEXT:    s_lshl_b32 s6, s6, 8
2776; GCN-NEXT:    s_bfe_u32 s2, s2, 0x80010
2777; GCN-NEXT:    s_or_b32 s5, s5, s6
2778; GCN-NEXT:    s_lshl_b32 s2, s2, 16
2779; GCN-NEXT:    s_or_b32 s2, s5, s2
2780; GCN-NEXT:    s_lshl_b32 s5, s7, 24
2781; GCN-NEXT:    s_bfe_u32 s6, s3, 0x80008
2782; GCN-NEXT:    s_lshr_b32 s8, s3, 24
2783; GCN-NEXT:    s_or_b32 s2, s2, s5
2784; GCN-NEXT:    s_and_b32 s5, s3, 0xff
2785; GCN-NEXT:    s_lshl_b32 s6, s6, 8
2786; GCN-NEXT:    s_bfe_u32 s3, s3, 0x80010
2787; GCN-NEXT:    s_or_b32 s5, s5, s6
2788; GCN-NEXT:    s_lshl_b32 s3, s3, 16
2789; GCN-NEXT:    s_or_b32 s3, s5, s3
2790; GCN-NEXT:    s_lshl_b32 s5, s8, 24
2791; GCN-NEXT:    s_or_b32 s3, s3, s5
2792; GCN-NEXT:    s_lshr_b32 s5, s4, 2
2793; GCN-NEXT:    s_cmp_eq_u32 s5, 1
2794; GCN-NEXT:    s_cselect_b32 s0, s1, s0
2795; GCN-NEXT:    s_cmp_eq_u32 s5, 2
2796; GCN-NEXT:    s_cselect_b32 s0, s2, s0
2797; GCN-NEXT:    s_cmp_eq_u32 s5, 3
2798; GCN-NEXT:    s_cselect_b32 s0, s3, s0
2799; GCN-NEXT:    s_and_b32 s1, s4, 3
2800; GCN-NEXT:    s_lshl_b32 s1, s1, 3
2801; GCN-NEXT:    s_lshr_b32 s0, s0, s1
2802; GCN-NEXT:    ; return to shader part epilog
2803;
2804; GFX10-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
2805; GFX10:       ; %bb.0:
2806; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
2807; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2808; GFX10-NEXT:    s_bfe_u32 s10, s0, 0x80008
2809; GFX10-NEXT:    s_bfe_u32 s12, s1, 0x80008
2810; GFX10-NEXT:    s_lshr_b32 s6, s1, 24
2811; GFX10-NEXT:    s_and_b32 s9, s0, 0xff
2812; GFX10-NEXT:    s_and_b32 s11, s1, 0xff
2813; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x80010
2814; GFX10-NEXT:    s_lshl_b32 s10, s10, 8
2815; GFX10-NEXT:    s_lshl_b32 s12, s12, 8
2816; GFX10-NEXT:    s_lshr_b32 s5, s0, 24
2817; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x80010
2818; GFX10-NEXT:    s_lshl_b32 s1, s1, 16
2819; GFX10-NEXT:    s_or_b32 s9, s9, s10
2820; GFX10-NEXT:    s_or_b32 s10, s11, s12
2821; GFX10-NEXT:    s_bfe_u32 s14, s2, 0x80008
2822; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
2823; GFX10-NEXT:    s_lshl_b32 s6, s6, 24
2824; GFX10-NEXT:    s_or_b32 s1, s10, s1
2825; GFX10-NEXT:    s_lshr_b32 s7, s2, 24
2826; GFX10-NEXT:    s_and_b32 s13, s2, 0xff
2827; GFX10-NEXT:    s_bfe_u32 s2, s2, 0x80010
2828; GFX10-NEXT:    s_lshl_b32 s5, s5, 24
2829; GFX10-NEXT:    s_lshl_b32 s14, s14, 8
2830; GFX10-NEXT:    s_or_b32 s0, s9, s0
2831; GFX10-NEXT:    s_or_b32 s1, s1, s6
2832; GFX10-NEXT:    s_bfe_u32 s6, s3, 0x80008
2833; GFX10-NEXT:    s_lshr_b32 s8, s3, 24
2834; GFX10-NEXT:    s_lshl_b32 s2, s2, 16
2835; GFX10-NEXT:    s_or_b32 s11, s13, s14
2836; GFX10-NEXT:    s_or_b32 s0, s0, s5
2837; GFX10-NEXT:    s_lshl_b32 s5, s7, 24
2838; GFX10-NEXT:    s_and_b32 s7, s3, 0xff
2839; GFX10-NEXT:    s_lshl_b32 s6, s6, 8
2840; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x80010
2841; GFX10-NEXT:    s_or_b32 s2, s11, s2
2842; GFX10-NEXT:    s_or_b32 s6, s7, s6
2843; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
2844; GFX10-NEXT:    s_or_b32 s2, s2, s5
2845; GFX10-NEXT:    s_or_b32 s3, s6, s3
2846; GFX10-NEXT:    s_lshl_b32 s5, s8, 24
2847; GFX10-NEXT:    s_lshr_b32 s6, s4, 2
2848; GFX10-NEXT:    s_or_b32 s3, s3, s5
2849; GFX10-NEXT:    s_cmp_eq_u32 s6, 1
2850; GFX10-NEXT:    s_cselect_b32 s0, s1, s0
2851; GFX10-NEXT:    s_cmp_eq_u32 s6, 2
2852; GFX10-NEXT:    s_cselect_b32 s0, s2, s0
2853; GFX10-NEXT:    s_cmp_eq_u32 s6, 3
2854; GFX10-NEXT:    s_cselect_b32 s0, s3, s0
2855; GFX10-NEXT:    s_and_b32 s1, s4, 3
2856; GFX10-NEXT:    s_lshl_b32 s1, s1, 3
2857; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
2858; GFX10-NEXT:    ; return to shader part epilog
2859;
2860; GFX11-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
2861; GFX11:       ; %bb.0:
2862; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
2863; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2864; GFX11-NEXT:    s_bfe_u32 s10, s0, 0x80008
2865; GFX11-NEXT:    s_bfe_u32 s12, s1, 0x80008
2866; GFX11-NEXT:    s_lshr_b32 s6, s1, 24
2867; GFX11-NEXT:    s_and_b32 s9, s0, 0xff
2868; GFX11-NEXT:    s_and_b32 s11, s1, 0xff
2869; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x80010
2870; GFX11-NEXT:    s_lshl_b32 s10, s10, 8
2871; GFX11-NEXT:    s_lshl_b32 s12, s12, 8
2872; GFX11-NEXT:    s_lshr_b32 s5, s0, 24
2873; GFX11-NEXT:    s_bfe_u32 s0, s0, 0x80010
2874; GFX11-NEXT:    s_lshl_b32 s1, s1, 16
2875; GFX11-NEXT:    s_or_b32 s9, s9, s10
2876; GFX11-NEXT:    s_or_b32 s10, s11, s12
2877; GFX11-NEXT:    s_bfe_u32 s14, s2, 0x80008
2878; GFX11-NEXT:    s_lshl_b32 s0, s0, 16
2879; GFX11-NEXT:    s_lshl_b32 s6, s6, 24
2880; GFX11-NEXT:    s_or_b32 s1, s10, s1
2881; GFX11-NEXT:    s_lshr_b32 s7, s2, 24
2882; GFX11-NEXT:    s_and_b32 s13, s2, 0xff
2883; GFX11-NEXT:    s_bfe_u32 s2, s2, 0x80010
2884; GFX11-NEXT:    s_lshl_b32 s5, s5, 24
2885; GFX11-NEXT:    s_lshl_b32 s14, s14, 8
2886; GFX11-NEXT:    s_or_b32 s0, s9, s0
2887; GFX11-NEXT:    s_or_b32 s1, s1, s6
2888; GFX11-NEXT:    s_bfe_u32 s6, s3, 0x80008
2889; GFX11-NEXT:    s_lshr_b32 s8, s3, 24
2890; GFX11-NEXT:    s_lshl_b32 s2, s2, 16
2891; GFX11-NEXT:    s_or_b32 s11, s13, s14
2892; GFX11-NEXT:    s_or_b32 s0, s0, s5
2893; GFX11-NEXT:    s_lshl_b32 s5, s7, 24
2894; GFX11-NEXT:    s_and_b32 s7, s3, 0xff
2895; GFX11-NEXT:    s_lshl_b32 s6, s6, 8
2896; GFX11-NEXT:    s_bfe_u32 s3, s3, 0x80010
2897; GFX11-NEXT:    s_or_b32 s2, s11, s2
2898; GFX11-NEXT:    s_or_b32 s6, s7, s6
2899; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
2900; GFX11-NEXT:    s_or_b32 s2, s2, s5
2901; GFX11-NEXT:    s_or_b32 s3, s6, s3
2902; GFX11-NEXT:    s_lshl_b32 s5, s8, 24
2903; GFX11-NEXT:    s_lshr_b32 s6, s4, 2
2904; GFX11-NEXT:    s_or_b32 s3, s3, s5
2905; GFX11-NEXT:    s_cmp_eq_u32 s6, 1
2906; GFX11-NEXT:    s_cselect_b32 s0, s1, s0
2907; GFX11-NEXT:    s_cmp_eq_u32 s6, 2
2908; GFX11-NEXT:    s_cselect_b32 s0, s2, s0
2909; GFX11-NEXT:    s_cmp_eq_u32 s6, 3
2910; GFX11-NEXT:    s_cselect_b32 s0, s3, s0
2911; GFX11-NEXT:    s_and_b32 s1, s4, 3
2912; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2913; GFX11-NEXT:    s_lshl_b32 s1, s1, 3
2914; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
2915; GFX11-NEXT:    ; return to shader part epilog
2916  %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
2917  %element = extractelement <16 x i8> %vector, i32 %idx
2918  ret i8 %element
2919}
2920
2921define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
2922; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
2923; GFX9:       ; %bb.0:
2924; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
2925; GFX9-NEXT:    s_mov_b32 s0, 8
2926; GFX9-NEXT:    s_mov_b32 s1, 16
2927; GFX9-NEXT:    s_movk_i32 s3, 0xff
2928; GFX9-NEXT:    v_mov_b32_e32 v5, 8
2929; GFX9-NEXT:    v_mov_b32_e32 v4, 0xff
2930; GFX9-NEXT:    v_mov_b32_e32 v6, 16
2931; GFX9-NEXT:    s_lshr_b32 s4, s2, 2
2932; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s4, 1
2933; GFX9-NEXT:    s_and_b32 s2, s2, 3
2934; GFX9-NEXT:    s_waitcnt vmcnt(0)
2935; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v0
2936; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 24, v1
2937; GFX9-NEXT:    v_lshlrev_b32_sdwa v11, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2938; GFX9-NEXT:    v_lshlrev_b32_sdwa v13, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2939; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 24, v2
2940; GFX9-NEXT:    v_lshlrev_b32_sdwa v12, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2941; GFX9-NEXT:    v_lshlrev_b32_sdwa v14, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2942; GFX9-NEXT:    v_lshlrev_b32_sdwa v15, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2943; GFX9-NEXT:    v_and_or_b32 v0, v0, s3, v11
2944; GFX9-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
2945; GFX9-NEXT:    v_and_or_b32 v1, v1, s3, v13
2946; GFX9-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
2947; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 24, v3
2948; GFX9-NEXT:    v_lshlrev_b32_sdwa v16, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2949; GFX9-NEXT:    v_lshlrev_b32_sdwa v5, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2950; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v15
2951; GFX9-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
2952; GFX9-NEXT:    v_or3_b32 v0, v0, v12, v7
2953; GFX9-NEXT:    v_or3_b32 v1, v1, v14, v8
2954; GFX9-NEXT:    v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2955; GFX9-NEXT:    v_and_or_b32 v3, v3, v4, v5
2956; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 24, v10
2957; GFX9-NEXT:    v_or3_b32 v2, v2, v16, v9
2958; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2959; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s4, 2
2960; GFX9-NEXT:    v_or3_b32 v3, v3, v6, v4
2961; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2962; GFX9-NEXT:    v_cmp_eq_u32_e64 vcc, s4, 3
2963; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2964; GFX9-NEXT:    s_lshl_b32 s0, s2, 3
2965; GFX9-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
2966; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2967; GFX9-NEXT:    ; return to shader part epilog
2968;
2969; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
2970; GFX8:       ; %bb.0:
2971; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
2972; GFX8-NEXT:    v_mov_b32_e32 v4, 8
2973; GFX8-NEXT:    v_mov_b32_e32 v5, 16
2974; GFX8-NEXT:    v_mov_b32_e32 v6, 8
2975; GFX8-NEXT:    v_mov_b32_e32 v7, 16
2976; GFX8-NEXT:    s_lshr_b32 s0, s2, 2
2977; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
2978; GFX8-NEXT:    s_and_b32 s1, s2, 3
2979; GFX8-NEXT:    s_waitcnt vmcnt(0)
2980; GFX8-NEXT:    v_lshlrev_b32_sdwa v12, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2981; GFX8-NEXT:    v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2982; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 24, v0
2983; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 24, v1
2984; GFX8-NEXT:    v_lshlrev_b32_sdwa v13, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2985; GFX8-NEXT:    v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2986; GFX8-NEXT:    v_lshlrev_b32_sdwa v14, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2987; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2988; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2989; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 24, v2
2990; GFX8-NEXT:    v_lshlrev_b32_sdwa v15, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2991; GFX8-NEXT:    v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2992; GFX8-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
2993; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 24, v9
2994; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2995; GFX8-NEXT:    v_or_b32_e32 v0, v0, v13
2996; GFX8-NEXT:    v_or_b32_e32 v1, v1, v5
2997; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 24, v3
2998; GFX8-NEXT:    v_lshlrev_b32_sdwa v7, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2999; GFX8-NEXT:    v_lshlrev_b32_e32 v9, 24, v10
3000; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3001; GFX8-NEXT:    v_or_b32_e32 v2, v2, v15
3002; GFX8-NEXT:    v_or_b32_e32 v0, v0, v8
3003; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
3004; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 24, v11
3005; GFX8-NEXT:    v_or_b32_e32 v3, v3, v7
3006; GFX8-NEXT:    v_or_b32_e32 v2, v2, v9
3007; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3008; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 2
3009; GFX8-NEXT:    v_or_b32_e32 v3, v3, v6
3010; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3011; GFX8-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 3
3012; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3013; GFX8-NEXT:    s_lshl_b32 s0, s1, 3
3014; GFX8-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
3015; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
3016; GFX8-NEXT:    ; return to shader part epilog
3017;
3018; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
3019; GFX7:       ; %bb.0:
3020; GFX7-NEXT:    s_mov_b32 s6, 0
3021; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3022; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3023; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3024; GFX7-NEXT:    s_lshr_b32 s0, s2, 2
3025; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 1
3026; GFX7-NEXT:    s_and_b32 s1, s2, 3
3027; GFX7-NEXT:    s_waitcnt vmcnt(0)
3028; GFX7-NEXT:    v_bfe_u32 v9, v0, 8, 8
3029; GFX7-NEXT:    v_bfe_u32 v11, v1, 8, 8
3030; GFX7-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
3031; GFX7-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
3032; GFX7-NEXT:    v_and_b32_e32 v8, 0xff, v0
3033; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
3034; GFX7-NEXT:    v_and_b32_e32 v10, 0xff, v1
3035; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
3036; GFX7-NEXT:    v_bfe_u32 v13, v2, 8, 8
3037; GFX7-NEXT:    v_lshlrev_b32_e32 v9, 8, v9
3038; GFX7-NEXT:    v_lshlrev_b32_e32 v11, 8, v11
3039; GFX7-NEXT:    v_lshrrev_b32_e32 v6, 24, v2
3040; GFX7-NEXT:    v_and_b32_e32 v12, 0xff, v2
3041; GFX7-NEXT:    v_bfe_u32 v2, v2, 16, 8
3042; GFX7-NEXT:    v_bfe_u32 v15, v3, 8, 8
3043; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
3044; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3045; GFX7-NEXT:    v_lshlrev_b32_e32 v13, 8, v13
3046; GFX7-NEXT:    v_or_b32_e32 v8, v8, v9
3047; GFX7-NEXT:    v_or_b32_e32 v9, v10, v11
3048; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 24, v3
3049; GFX7-NEXT:    v_and_b32_e32 v14, 0xff, v3
3050; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
3051; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
3052; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
3053; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
3054; GFX7-NEXT:    v_lshlrev_b32_e32 v15, 8, v15
3055; GFX7-NEXT:    v_or_b32_e32 v10, v12, v13
3056; GFX7-NEXT:    v_or_b32_e32 v0, v8, v0
3057; GFX7-NEXT:    v_or_b32_e32 v1, v9, v1
3058; GFX7-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
3059; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
3060; GFX7-NEXT:    v_or_b32_e32 v11, v14, v15
3061; GFX7-NEXT:    v_or_b32_e32 v2, v10, v2
3062; GFX7-NEXT:    v_or_b32_e32 v0, v0, v4
3063; GFX7-NEXT:    v_or_b32_e32 v1, v1, v5
3064; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
3065; GFX7-NEXT:    v_or_b32_e32 v3, v11, v3
3066; GFX7-NEXT:    v_or_b32_e32 v2, v2, v6
3067; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3068; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 2
3069; GFX7-NEXT:    v_or_b32_e32 v3, v3, v7
3070; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3071; GFX7-NEXT:    v_cmp_eq_u32_e64 vcc, s0, 3
3072; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3073; GFX7-NEXT:    s_lshl_b32 s0, s1, 3
3074; GFX7-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
3075; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
3076; GFX7-NEXT:    ; return to shader part epilog
3077;
3078; GFX10-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
3079; GFX10:       ; %bb.0:
3080; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3081; GFX10-NEXT:    s_mov_b32 s0, 8
3082; GFX10-NEXT:    v_mov_b32_e32 v4, 8
3083; GFX10-NEXT:    s_mov_b32 s1, 16
3084; GFX10-NEXT:    v_mov_b32_e32 v5, 16
3085; GFX10-NEXT:    s_waitcnt vmcnt(0)
3086; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 24, v0
3087; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
3088; GFX10-NEXT:    v_lshlrev_b32_sdwa v9, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3089; GFX10-NEXT:    v_lshlrev_b32_sdwa v11, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3090; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 24, v2
3091; GFX10-NEXT:    v_lshlrev_b32_sdwa v10, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3092; GFX10-NEXT:    v_lshlrev_b32_sdwa v12, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3093; GFX10-NEXT:    v_lshlrev_b32_sdwa v13, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3094; GFX10-NEXT:    v_and_or_b32 v0, v0, 0xff, v9
3095; GFX10-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
3096; GFX10-NEXT:    v_and_or_b32 v1, v1, 0xff, v11
3097; GFX10-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
3098; GFX10-NEXT:    s_lshr_b32 s0, s2, 2
3099; GFX10-NEXT:    v_lshlrev_b32_sdwa v14, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3100; GFX10-NEXT:    v_and_or_b32 v2, 0xff, v2, v13
3101; GFX10-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
3102; GFX10-NEXT:    v_lshlrev_b32_sdwa v4, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3103; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 24, v3
3104; GFX10-NEXT:    v_or3_b32 v0, v0, v10, v6
3105; GFX10-NEXT:    v_or3_b32 v1, v1, v12, v7
3106; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 1
3107; GFX10-NEXT:    v_or3_b32 v2, v2, v14, v8
3108; GFX10-NEXT:    v_and_or_b32 v4, 0xff, v3, v4
3109; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3110; GFX10-NEXT:    v_lshlrev_b32_e32 v5, 24, v9
3111; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3112; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 2
3113; GFX10-NEXT:    v_or3_b32 v1, v4, v3, v5
3114; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3115; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 3
3116; GFX10-NEXT:    s_and_b32 s0, s2, 3
3117; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
3118; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3119; GFX10-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
3120; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
3121; GFX10-NEXT:    ; return to shader part epilog
3122;
3123; GFX11-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
3124; GFX11:       ; %bb.0:
3125; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
3126; GFX11-NEXT:    s_lshr_b32 s0, s2, 2
3127; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3128; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 1
3129; GFX11-NEXT:    s_waitcnt vmcnt(0)
3130; GFX11-NEXT:    v_bfe_u32 v12, v2, 8, 8
3131; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 24, v2
3132; GFX11-NEXT:    v_bfe_u32 v8, v0, 8, 8
3133; GFX11-NEXT:    v_bfe_u32 v13, v2, 16, 8
3134; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
3135; GFX11-NEXT:    v_lshlrev_b32_e32 v12, 8, v12
3136; GFX11-NEXT:    v_bfe_u32 v9, v0, 16, 8
3137; GFX11-NEXT:    v_bfe_u32 v10, v1, 8, 8
3138; GFX11-NEXT:    v_lshlrev_b32_e32 v8, 8, v8
3139; GFX11-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
3140; GFX11-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
3141; GFX11-NEXT:    v_and_or_b32 v2, 0xff, v2, v12
3142; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 24, v1
3143; GFX11-NEXT:    v_bfe_u32 v11, v1, 16, 8
3144; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
3145; GFX11-NEXT:    v_lshlrev_b32_e32 v10, 8, v10
3146; GFX11-NEXT:    v_and_or_b32 v0, v0, 0xff, v8
3147; GFX11-NEXT:    v_or3_b32 v2, v2, v13, v6
3148; GFX11-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
3149; GFX11-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
3150; GFX11-NEXT:    v_and_or_b32 v1, v1, 0xff, v10
3151; GFX11-NEXT:    v_bfe_u32 v14, v3, 8, 8
3152; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 24, v3
3153; GFX11-NEXT:    v_or3_b32 v0, v0, v9, v4
3154; GFX11-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
3155; GFX11-NEXT:    v_bfe_u32 v10, v3, 16, 8
3156; GFX11-NEXT:    v_lshlrev_b32_e32 v8, 8, v14
3157; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3158; GFX11-NEXT:    v_or3_b32 v1, v1, v11, v5
3159; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 16, v10
3160; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3161; GFX11-NEXT:    v_and_or_b32 v3, 0xff, v3, v8
3162; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3163; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 2
3164; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
3165; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3166; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s0, 3
3167; GFX11-NEXT:    v_lshlrev_b32_e32 v5, 24, v7
3168; GFX11-NEXT:    s_and_b32 s0, s2, 3
3169; GFX11-NEXT:    s_lshl_b32 s0, s0, 3
3170; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3171; GFX11-NEXT:    v_or3_b32 v1, v3, v4, v5
3172; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3173; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3174; GFX11-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
3175; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
3176; GFX11-NEXT:    ; return to shader part epilog
3177  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3178  %element = extractelement <16 x i8> %vector, i32 %idx
3179  ret i8 %element
3180}
3181
3182define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 %idx) {
3183; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
3184; GFX9:       ; %bb.0:
3185; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3186; GFX9-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
3187; GFX9-NEXT:    s_mov_b32 s4, 8
3188; GFX9-NEXT:    s_mov_b32 s5, 16
3189; GFX9-NEXT:    s_movk_i32 s6, 0xff
3190; GFX9-NEXT:    v_mov_b32_e32 v1, 8
3191; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
3192; GFX9-NEXT:    v_mov_b32_e32 v7, 16
3193; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 2, v2
3194; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
3195; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
3196; GFX9-NEXT:    s_waitcnt vmcnt(0)
3197; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 24, v3
3198; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 24, v4
3199; GFX9-NEXT:    v_lshlrev_b32_sdwa v13, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3200; GFX9-NEXT:    v_lshlrev_b32_sdwa v15, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3201; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 24, v5
3202; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 24, v6
3203; GFX9-NEXT:    v_lshlrev_b32_sdwa v14, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3204; GFX9-NEXT:    v_lshlrev_b32_sdwa v16, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3205; GFX9-NEXT:    v_lshlrev_b32_sdwa v17, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3206; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3207; GFX9-NEXT:    v_and_or_b32 v3, v3, s6, v13
3208; GFX9-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
3209; GFX9-NEXT:    v_and_or_b32 v4, v4, s6, v15
3210; GFX9-NEXT:    v_lshlrev_b32_e32 v10, 24, v10
3211; GFX9-NEXT:    v_lshlrev_b32_sdwa v18, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3212; GFX9-NEXT:    v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3213; GFX9-NEXT:    v_and_or_b32 v5, v5, v0, v17
3214; GFX9-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
3215; GFX9-NEXT:    v_and_or_b32 v0, v6, v0, v1
3216; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v12
3217; GFX9-NEXT:    v_or3_b32 v3, v3, v14, v9
3218; GFX9-NEXT:    v_or3_b32 v4, v4, v16, v10
3219; GFX9-NEXT:    v_or3_b32 v5, v5, v18, v11
3220; GFX9-NEXT:    v_or3_b32 v0, v0, v7, v1
3221; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
3222; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
3223; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
3224; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
3225; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3226; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
3227; GFX9-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
3228; GFX9-NEXT:    s_setpc_b64 s[30:31]
3229;
3230; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
3231; GFX8:       ; %bb.0:
3232; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3233; GFX8-NEXT:    flat_load_dwordx4 v[3:6], v[0:1]
3234; GFX8-NEXT:    v_mov_b32_e32 v0, 8
3235; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3236; GFX8-NEXT:    v_mov_b32_e32 v7, 8
3237; GFX8-NEXT:    v_mov_b32_e32 v8, 16
3238; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 2, v2
3239; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v9
3240; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
3241; GFX8-NEXT:    s_waitcnt vmcnt(0)
3242; GFX8-NEXT:    v_lshlrev_b32_sdwa v14, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3243; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3244; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 24, v3
3245; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 24, v4
3246; GFX8-NEXT:    v_lshlrev_b32_sdwa v15, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3247; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3248; GFX8-NEXT:    v_lshlrev_b32_sdwa v16, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3249; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3250; GFX8-NEXT:    v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3251; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 24, v5
3252; GFX8-NEXT:    v_lshlrev_b32_sdwa v17, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3253; GFX8-NEXT:    v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3254; GFX8-NEXT:    v_lshlrev_b32_e32 v10, 24, v10
3255; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 24, v11
3256; GFX8-NEXT:    v_or_b32_sdwa v5, v5, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3257; GFX8-NEXT:    v_or_b32_e32 v3, v3, v15
3258; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3259; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 24, v6
3260; GFX8-NEXT:    v_lshlrev_b32_sdwa v8, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3261; GFX8-NEXT:    v_lshlrev_b32_e32 v11, 24, v12
3262; GFX8-NEXT:    v_or_b32_sdwa v6, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3263; GFX8-NEXT:    v_or_b32_e32 v1, v5, v17
3264; GFX8-NEXT:    v_or_b32_e32 v3, v3, v10
3265; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
3266; GFX8-NEXT:    v_lshlrev_b32_e32 v7, 24, v13
3267; GFX8-NEXT:    v_or_b32_e32 v5, v6, v8
3268; GFX8-NEXT:    v_or_b32_e32 v1, v1, v11
3269; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
3270; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v9
3271; GFX8-NEXT:    v_or_b32_e32 v4, v5, v7
3272; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3273; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
3274; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3275; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
3276; GFX8-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
3277; GFX8-NEXT:    s_setpc_b64 s[30:31]
3278;
3279; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
3280; GFX7:       ; %bb.0:
3281; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3282; GFX7-NEXT:    s_mov_b32 s6, 0
3283; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3284; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3285; GFX7-NEXT:    buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64
3286; GFX7-NEXT:    v_lshrrev_b32_e32 v17, 2, v2
3287; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v17
3288; GFX7-NEXT:    v_and_b32_e32 v2, 3, v2
3289; GFX7-NEXT:    s_waitcnt vmcnt(0)
3290; GFX7-NEXT:    v_bfe_u32 v10, v3, 8, 8
3291; GFX7-NEXT:    v_bfe_u32 v12, v4, 8, 8
3292; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v3
3293; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v4
3294; GFX7-NEXT:    v_and_b32_e32 v9, 0xff, v3
3295; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
3296; GFX7-NEXT:    v_and_b32_e32 v11, 0xff, v4
3297; GFX7-NEXT:    v_bfe_u32 v4, v4, 16, 8
3298; GFX7-NEXT:    v_bfe_u32 v14, v5, 8, 8
3299; GFX7-NEXT:    v_lshlrev_b32_e32 v10, 8, v10
3300; GFX7-NEXT:    v_lshlrev_b32_e32 v12, 8, v12
3301; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 24, v5
3302; GFX7-NEXT:    v_and_b32_e32 v13, 0xff, v5
3303; GFX7-NEXT:    v_bfe_u32 v5, v5, 16, 8
3304; GFX7-NEXT:    v_bfe_u32 v16, v6, 8, 8
3305; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
3306; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
3307; GFX7-NEXT:    v_lshlrev_b32_e32 v14, 8, v14
3308; GFX7-NEXT:    v_or_b32_e32 v9, v9, v10
3309; GFX7-NEXT:    v_or_b32_e32 v10, v11, v12
3310; GFX7-NEXT:    v_lshrrev_b32_e32 v8, 24, v6
3311; GFX7-NEXT:    v_and_b32_e32 v15, 0xff, v6
3312; GFX7-NEXT:    v_bfe_u32 v6, v6, 16, 8
3313; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
3314; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3315; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
3316; GFX7-NEXT:    v_lshlrev_b32_e32 v16, 8, v16
3317; GFX7-NEXT:    v_or_b32_e32 v11, v13, v14
3318; GFX7-NEXT:    v_or_b32_e32 v3, v9, v3
3319; GFX7-NEXT:    v_or_b32_e32 v4, v10, v4
3320; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
3321; GFX7-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
3322; GFX7-NEXT:    v_or_b32_e32 v12, v15, v16
3323; GFX7-NEXT:    v_or_b32_e32 v5, v11, v5
3324; GFX7-NEXT:    v_or_b32_e32 v0, v3, v0
3325; GFX7-NEXT:    v_or_b32_e32 v1, v4, v1
3326; GFX7-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
3327; GFX7-NEXT:    v_or_b32_e32 v6, v12, v6
3328; GFX7-NEXT:    v_or_b32_e32 v3, v5, v7
3329; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3330; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v17
3331; GFX7-NEXT:    v_or_b32_e32 v4, v6, v8
3332; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3333; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v17
3334; GFX7-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3335; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
3336; GFX7-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
3337; GFX7-NEXT:    s_setpc_b64 s[30:31]
3338;
3339; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
3340; GFX10:       ; %bb.0:
3341; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3342; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3343; GFX10-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
3344; GFX10-NEXT:    s_mov_b32 s4, 8
3345; GFX10-NEXT:    v_mov_b32_e32 v0, 8
3346; GFX10-NEXT:    s_mov_b32 s5, 16
3347; GFX10-NEXT:    v_mov_b32_e32 v1, 16
3348; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 2, v2
3349; GFX10-NEXT:    v_and_b32_e32 v2, 3, v2
3350; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
3351; GFX10-NEXT:    s_waitcnt vmcnt(0)
3352; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 24, v3
3353; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 24, v4
3354; GFX10-NEXT:    v_lshlrev_b32_sdwa v12, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3355; GFX10-NEXT:    v_lshlrev_b32_sdwa v14, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3356; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 24, v5
3357; GFX10-NEXT:    v_lshlrev_b32_sdwa v13, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3358; GFX10-NEXT:    v_lshlrev_b32_sdwa v15, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3359; GFX10-NEXT:    v_lshlrev_b32_sdwa v16, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3360; GFX10-NEXT:    v_and_or_b32 v3, v3, 0xff, v12
3361; GFX10-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
3362; GFX10-NEXT:    v_and_or_b32 v4, v4, 0xff, v14
3363; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
3364; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 24, v6
3365; GFX10-NEXT:    v_lshlrev_b32_sdwa v17, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3366; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3367; GFX10-NEXT:    v_and_or_b32 v5, 0xff, v5, v16
3368; GFX10-NEXT:    v_lshlrev_b32_e32 v10, 24, v10
3369; GFX10-NEXT:    v_or3_b32 v3, v3, v13, v8
3370; GFX10-NEXT:    v_or3_b32 v4, v4, v15, v9
3371; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3372; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v6, v0
3373; GFX10-NEXT:    v_lshlrev_b32_e32 v6, 24, v11
3374; GFX10-NEXT:    v_or3_b32 v5, v5, v17, v10
3375; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc_lo
3376; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v7
3377; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v6
3378; GFX10-NEXT:    v_cndmask_b32_e32 v1, v3, v5, vcc_lo
3379; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v7
3380; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
3381; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
3382; GFX10-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
3383; GFX10-NEXT:    s_setpc_b64 s[30:31]
3384;
3385; GFX11-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
3386; GFX11:       ; %bb.0:
3387; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3388; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3389; GFX11-NEXT:    global_load_b128 v[3:6], v[0:1], off
3390; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 2, v2
3391; GFX11-NEXT:    v_and_b32_e32 v2, 3, v2
3392; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3393; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3394; GFX11-NEXT:    s_waitcnt vmcnt(0)
3395; GFX11-NEXT:    v_bfe_u32 v14, v5, 8, 8
3396; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 24, v5
3397; GFX11-NEXT:    v_bfe_u32 v15, v5, 16, 8
3398; GFX11-NEXT:    v_bfe_u32 v10, v3, 8, 8
3399; GFX11-NEXT:    v_bfe_u32 v12, v4, 8, 8
3400; GFX11-NEXT:    v_lshlrev_b32_e32 v14, 8, v14
3401; GFX11-NEXT:    v_lshlrev_b32_e32 v8, 24, v8
3402; GFX11-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
3403; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 24, v3
3404; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 24, v4
3405; GFX11-NEXT:    v_and_or_b32 v5, 0xff, v5, v14
3406; GFX11-NEXT:    v_bfe_u32 v11, v3, 16, 8
3407; GFX11-NEXT:    v_bfe_u32 v13, v4, 16, 8
3408; GFX11-NEXT:    v_lshlrev_b32_e32 v10, 8, v10
3409; GFX11-NEXT:    v_bfe_u32 v16, v6, 8, 8
3410; GFX11-NEXT:    v_or3_b32 v5, v5, v15, v8
3411; GFX11-NEXT:    v_lshlrev_b32_e32 v12, 8, v12
3412; GFX11-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
3413; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3414; GFX11-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
3415; GFX11-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
3416; GFX11-NEXT:    v_and_or_b32 v3, v3, 0xff, v10
3417; GFX11-NEXT:    v_and_or_b32 v4, v4, 0xff, v12
3418; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 24, v6
3419; GFX11-NEXT:    v_bfe_u32 v17, v6, 16, 8
3420; GFX11-NEXT:    v_lshlrev_b32_e32 v10, 8, v16
3421; GFX11-NEXT:    v_or3_b32 v1, v3, v11, v1
3422; GFX11-NEXT:    v_or3_b32 v3, v4, v13, v7
3423; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 24, v9
3424; GFX11-NEXT:    v_lshlrev_b32_e32 v12, 16, v17
3425; GFX11-NEXT:    v_and_or_b32 v6, 0xff, v6, v10
3426; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
3427; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
3428; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
3429; GFX11-NEXT:    v_or3_b32 v3, v6, v12, v4
3430; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3431; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
3432; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
3433; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v3 :: v_dual_lshlrev_b32 v1, 3, v2
3434; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3435; GFX11-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
3436; GFX11-NEXT:    s_setpc_b64 s[30:31]
3437  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3438  %element = extractelement <16 x i8> %vector, i32 %idx
3439  ret i8 %element
3440}
3441
3442define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
3443; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
3444; GCN:       ; %bb.0:
3445; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
3446; GCN-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
3447; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3448; GCN-NEXT:    v_and_b32_e32 v0, 3, v0
3449; GCN-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
3450; GCN-NEXT:    s_waitcnt lgkmcnt(0)
3451; GCN-NEXT:    s_bfe_u32 s9, s0, 0x80008
3452; GCN-NEXT:    s_lshr_b32 s4, s0, 24
3453; GCN-NEXT:    s_and_b32 s8, s0, 0xff
3454; GCN-NEXT:    s_lshl_b32 s9, s9, 8
3455; GCN-NEXT:    s_bfe_u32 s0, s0, 0x80010
3456; GCN-NEXT:    s_or_b32 s8, s8, s9
3457; GCN-NEXT:    s_lshl_b32 s0, s0, 16
3458; GCN-NEXT:    s_or_b32 s0, s8, s0
3459; GCN-NEXT:    s_lshl_b32 s4, s4, 24
3460; GCN-NEXT:    s_bfe_u32 s8, s1, 0x80008
3461; GCN-NEXT:    s_lshr_b32 s5, s1, 24
3462; GCN-NEXT:    s_or_b32 s0, s0, s4
3463; GCN-NEXT:    s_and_b32 s4, s1, 0xff
3464; GCN-NEXT:    s_lshl_b32 s8, s8, 8
3465; GCN-NEXT:    s_bfe_u32 s1, s1, 0x80010
3466; GCN-NEXT:    s_or_b32 s4, s4, s8
3467; GCN-NEXT:    s_lshl_b32 s1, s1, 16
3468; GCN-NEXT:    s_or_b32 s1, s4, s1
3469; GCN-NEXT:    s_lshl_b32 s4, s5, 24
3470; GCN-NEXT:    s_bfe_u32 s5, s2, 0x80008
3471; GCN-NEXT:    s_lshr_b32 s6, s2, 24
3472; GCN-NEXT:    s_or_b32 s1, s1, s4
3473; GCN-NEXT:    s_and_b32 s4, s2, 0xff
3474; GCN-NEXT:    s_lshl_b32 s5, s5, 8
3475; GCN-NEXT:    s_bfe_u32 s2, s2, 0x80010
3476; GCN-NEXT:    s_or_b32 s4, s4, s5
3477; GCN-NEXT:    s_lshl_b32 s2, s2, 16
3478; GCN-NEXT:    s_or_b32 s2, s4, s2
3479; GCN-NEXT:    s_lshl_b32 s4, s6, 24
3480; GCN-NEXT:    s_bfe_u32 s5, s3, 0x80008
3481; GCN-NEXT:    s_lshr_b32 s7, s3, 24
3482; GCN-NEXT:    s_or_b32 s2, s2, s4
3483; GCN-NEXT:    s_and_b32 s4, s3, 0xff
3484; GCN-NEXT:    s_lshl_b32 s5, s5, 8
3485; GCN-NEXT:    s_bfe_u32 s3, s3, 0x80010
3486; GCN-NEXT:    s_or_b32 s4, s4, s5
3487; GCN-NEXT:    s_lshl_b32 s3, s3, 16
3488; GCN-NEXT:    s_or_b32 s3, s4, s3
3489; GCN-NEXT:    s_lshl_b32 s4, s7, 24
3490; GCN-NEXT:    v_mov_b32_e32 v2, s0
3491; GCN-NEXT:    v_mov_b32_e32 v3, s1
3492; GCN-NEXT:    s_or_b32 s3, s3, s4
3493; GCN-NEXT:    v_mov_b32_e32 v4, s2
3494; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3495; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
3496; GCN-NEXT:    v_mov_b32_e32 v5, s3
3497; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
3498; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
3499; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v5, vcc
3500; GCN-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
3501; GCN-NEXT:    v_readfirstlane_b32 s0, v0
3502; GCN-NEXT:    ; return to shader part epilog
3503;
3504; GFX10-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
3505; GFX10:       ; %bb.0:
3506; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
3507; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
3508; GFX10-NEXT:    v_and_b32_e32 v0, 3, v0
3509; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
3510; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
3511; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3512; GFX10-NEXT:    s_bfe_u32 s9, s0, 0x80008
3513; GFX10-NEXT:    s_bfe_u32 s11, s1, 0x80008
3514; GFX10-NEXT:    s_lshr_b32 s5, s1, 24
3515; GFX10-NEXT:    s_and_b32 s8, s0, 0xff
3516; GFX10-NEXT:    s_and_b32 s10, s1, 0xff
3517; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x80010
3518; GFX10-NEXT:    s_lshl_b32 s9, s9, 8
3519; GFX10-NEXT:    s_lshl_b32 s11, s11, 8
3520; GFX10-NEXT:    s_lshl_b32 s1, s1, 16
3521; GFX10-NEXT:    s_or_b32 s8, s8, s9
3522; GFX10-NEXT:    s_or_b32 s9, s10, s11
3523; GFX10-NEXT:    s_lshl_b32 s5, s5, 24
3524; GFX10-NEXT:    s_or_b32 s1, s9, s1
3525; GFX10-NEXT:    s_lshr_b32 s4, s0, 24
3526; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x80010
3527; GFX10-NEXT:    s_or_b32 s1, s1, s5
3528; GFX10-NEXT:    s_lshl_b32 s0, s0, 16
3529; GFX10-NEXT:    s_bfe_u32 s13, s2, 0x80008
3530; GFX10-NEXT:    v_mov_b32_e32 v2, s1
3531; GFX10-NEXT:    s_lshl_b32 s4, s4, 24
3532; GFX10-NEXT:    s_or_b32 s0, s8, s0
3533; GFX10-NEXT:    s_lshr_b32 s6, s2, 24
3534; GFX10-NEXT:    s_and_b32 s12, s2, 0xff
3535; GFX10-NEXT:    s_bfe_u32 s2, s2, 0x80010
3536; GFX10-NEXT:    s_lshl_b32 s13, s13, 8
3537; GFX10-NEXT:    s_or_b32 s0, s0, s4
3538; GFX10-NEXT:    s_lshl_b32 s2, s2, 16
3539; GFX10-NEXT:    s_or_b32 s10, s12, s13
3540; GFX10-NEXT:    s_bfe_u32 s5, s3, 0x80008
3541; GFX10-NEXT:    v_cndmask_b32_e32 v2, s0, v2, vcc_lo
3542; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
3543; GFX10-NEXT:    s_or_b32 s2, s10, s2
3544; GFX10-NEXT:    s_lshl_b32 s4, s6, 24
3545; GFX10-NEXT:    s_and_b32 s6, s3, 0xff
3546; GFX10-NEXT:    s_lshl_b32 s5, s5, 8
3547; GFX10-NEXT:    s_bfe_u32 s1, s3, 0x80010
3548; GFX10-NEXT:    s_or_b32 s2, s2, s4
3549; GFX10-NEXT:    s_lshr_b32 s7, s3, 24
3550; GFX10-NEXT:    s_or_b32 s3, s6, s5
3551; GFX10-NEXT:    s_lshl_b32 s1, s1, 16
3552; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s2, vcc_lo
3553; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
3554; GFX10-NEXT:    s_or_b32 s0, s3, s1
3555; GFX10-NEXT:    s_lshl_b32 s1, s7, 24
3556; GFX10-NEXT:    s_or_b32 s3, s0, s1
3557; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s3, vcc_lo
3558; GFX10-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
3559; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
3560; GFX10-NEXT:    ; return to shader part epilog
3561;
3562; GFX11-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
3563; GFX11:       ; %bb.0:
3564; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
3565; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
3566; GFX11-NEXT:    v_and_b32_e32 v0, 3, v0
3567; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3568; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
3569; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
3570; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3571; GFX11-NEXT:    s_bfe_u32 s9, s0, 0x80008
3572; GFX11-NEXT:    s_bfe_u32 s11, s1, 0x80008
3573; GFX11-NEXT:    s_lshr_b32 s5, s1, 24
3574; GFX11-NEXT:    s_and_b32 s8, s0, 0xff
3575; GFX11-NEXT:    s_and_b32 s10, s1, 0xff
3576; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x80010
3577; GFX11-NEXT:    s_lshl_b32 s9, s9, 8
3578; GFX11-NEXT:    s_lshl_b32 s11, s11, 8
3579; GFX11-NEXT:    s_lshl_b32 s1, s1, 16
3580; GFX11-NEXT:    s_or_b32 s8, s8, s9
3581; GFX11-NEXT:    s_or_b32 s9, s10, s11
3582; GFX11-NEXT:    s_lshl_b32 s5, s5, 24
3583; GFX11-NEXT:    s_or_b32 s1, s9, s1
3584; GFX11-NEXT:    s_lshr_b32 s4, s0, 24
3585; GFX11-NEXT:    s_bfe_u32 s0, s0, 0x80010
3586; GFX11-NEXT:    s_or_b32 s1, s1, s5
3587; GFX11-NEXT:    s_lshl_b32 s0, s0, 16
3588; GFX11-NEXT:    s_bfe_u32 s13, s2, 0x80008
3589; GFX11-NEXT:    v_mov_b32_e32 v2, s1
3590; GFX11-NEXT:    s_lshl_b32 s4, s4, 24
3591; GFX11-NEXT:    s_or_b32 s0, s8, s0
3592; GFX11-NEXT:    s_lshr_b32 s6, s2, 24
3593; GFX11-NEXT:    s_and_b32 s12, s2, 0xff
3594; GFX11-NEXT:    s_bfe_u32 s2, s2, 0x80010
3595; GFX11-NEXT:    s_lshl_b32 s13, s13, 8
3596; GFX11-NEXT:    s_or_b32 s0, s0, s4
3597; GFX11-NEXT:    s_lshl_b32 s2, s2, 16
3598; GFX11-NEXT:    s_or_b32 s10, s12, s13
3599; GFX11-NEXT:    s_bfe_u32 s5, s3, 0x80008
3600; GFX11-NEXT:    v_cndmask_b32_e32 v2, s0, v2, vcc_lo
3601; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
3602; GFX11-NEXT:    s_or_b32 s2, s10, s2
3603; GFX11-NEXT:    s_lshl_b32 s4, s6, 24
3604; GFX11-NEXT:    s_and_b32 s6, s3, 0xff
3605; GFX11-NEXT:    s_lshl_b32 s5, s5, 8
3606; GFX11-NEXT:    s_bfe_u32 s1, s3, 0x80010
3607; GFX11-NEXT:    s_or_b32 s2, s2, s4
3608; GFX11-NEXT:    s_lshr_b32 s7, s3, 24
3609; GFX11-NEXT:    s_or_b32 s3, s6, s5
3610; GFX11-NEXT:    s_lshl_b32 s1, s1, 16
3611; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s2, vcc_lo
3612; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
3613; GFX11-NEXT:    s_or_b32 s0, s3, s1
3614; GFX11-NEXT:    s_lshl_b32 s1, s7, 24
3615; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
3616; GFX11-NEXT:    s_or_b32 s3, s0, s1
3617; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, s3, vcc_lo
3618; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3619; GFX11-NEXT:    v_lshrrev_b32_e32 v0, v0, v1
3620; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
3621; GFX11-NEXT:    ; return to shader part epilog
3622  %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
3623  %element = extractelement <16 x i8> %vector, i32 %idx
3624  ret i8 %element
3625}
3626
3627define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) {
3628; GFX9-LABEL: extractelement_vgpr_v16i8_idx0:
3629; GFX9:       ; %bb.0:
3630; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3631; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3632; GFX9-NEXT:    s_waitcnt vmcnt(0)
3633; GFX9-NEXT:    v_mov_b32_e32 v2, 8
3634; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
3635; GFX9-NEXT:    v_mov_b32_e32 v3, 16
3636; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
3637; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3638; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3639; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
3640; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3641; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
3642; GFX9-NEXT:    s_setpc_b64 s[30:31]
3643;
3644; GFX8-LABEL: extractelement_vgpr_v16i8_idx0:
3645; GFX8:       ; %bb.0:
3646; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3647; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3648; GFX8-NEXT:    s_waitcnt vmcnt(0)
3649; GFX8-NEXT:    v_mov_b32_e32 v1, 8
3650; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3651; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3652; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3653; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3654; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3655; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
3656; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3657; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3658; GFX8-NEXT:    s_setpc_b64 s[30:31]
3659;
3660; GFX7-LABEL: extractelement_vgpr_v16i8_idx0:
3661; GFX7:       ; %bb.0:
3662; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3663; GFX7-NEXT:    s_mov_b32 s6, 0
3664; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3665; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3666; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3667; GFX7-NEXT:    s_waitcnt vmcnt(0)
3668; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
3669; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
3670; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
3671; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
3672; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3673; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
3674; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
3675; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
3676; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3677; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3678; GFX7-NEXT:    s_setpc_b64 s[30:31]
3679;
3680; GFX10-LABEL: extractelement_vgpr_v16i8_idx0:
3681; GFX10:       ; %bb.0:
3682; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3683; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3684; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3685; GFX10-NEXT:    s_waitcnt vmcnt(0)
3686; GFX10-NEXT:    v_mov_b32_e32 v1, 8
3687; GFX10-NEXT:    v_mov_b32_e32 v2, 16
3688; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3689; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3690; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3691; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
3692; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3693; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
3694; GFX10-NEXT:    s_setpc_b64 s[30:31]
3695;
3696; GFX11-LABEL: extractelement_vgpr_v16i8_idx0:
3697; GFX11:       ; %bb.0:
3698; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3699; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3700; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
3701; GFX11-NEXT:    s_waitcnt vmcnt(0)
3702; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
3703; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
3704; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3705; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3706; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
3707; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
3708; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
3709; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
3710; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3711; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3712; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
3713; GFX11-NEXT:    s_setpc_b64 s[30:31]
3714  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3715  %element = extractelement <16 x i8> %vector, i32 0
3716  ret i8 %element
3717}
3718
3719define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) {
3720; GFX9-LABEL: extractelement_vgpr_v16i8_idx1:
3721; GFX9:       ; %bb.0:
3722; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3723; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3724; GFX9-NEXT:    s_mov_b32 s4, 8
3725; GFX9-NEXT:    s_waitcnt vmcnt(0)
3726; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
3727; GFX9-NEXT:    v_mov_b32_e32 v2, 16
3728; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3729; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3730; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3731; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v4
3732; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3733; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
3734; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3735; GFX9-NEXT:    s_setpc_b64 s[30:31]
3736;
3737; GFX8-LABEL: extractelement_vgpr_v16i8_idx1:
3738; GFX8:       ; %bb.0:
3739; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3740; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3741; GFX8-NEXT:    s_waitcnt vmcnt(0)
3742; GFX8-NEXT:    v_mov_b32_e32 v1, 8
3743; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3744; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3745; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3746; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3747; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3748; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
3749; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3750; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3751; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3752; GFX8-NEXT:    s_setpc_b64 s[30:31]
3753;
3754; GFX7-LABEL: extractelement_vgpr_v16i8_idx1:
3755; GFX7:       ; %bb.0:
3756; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3757; GFX7-NEXT:    s_mov_b32 s6, 0
3758; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3759; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3760; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3761; GFX7-NEXT:    s_waitcnt vmcnt(0)
3762; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
3763; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
3764; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
3765; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
3766; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3767; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
3768; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
3769; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
3770; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3771; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3772; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3773; GFX7-NEXT:    s_setpc_b64 s[30:31]
3774;
3775; GFX10-LABEL: extractelement_vgpr_v16i8_idx1:
3776; GFX10:       ; %bb.0:
3777; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3778; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3779; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3780; GFX10-NEXT:    s_mov_b32 s4, 8
3781; GFX10-NEXT:    s_waitcnt vmcnt(0)
3782; GFX10-NEXT:    v_mov_b32_e32 v1, 16
3783; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3784; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3785; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3786; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v2
3787; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
3788; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
3789; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3790; GFX10-NEXT:    s_setpc_b64 s[30:31]
3791;
3792; GFX11-LABEL: extractelement_vgpr_v16i8_idx1:
3793; GFX11:       ; %bb.0:
3794; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3795; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3796; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
3797; GFX11-NEXT:    s_waitcnt vmcnt(0)
3798; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
3799; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
3800; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3801; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3802; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
3803; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
3804; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
3805; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
3806; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3807; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3808; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
3809; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
3810; GFX11-NEXT:    s_setpc_b64 s[30:31]
3811  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3812  %element = extractelement <16 x i8> %vector, i32 1
3813  ret i8 %element
3814}
3815
3816define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) {
3817; GFX9-LABEL: extractelement_vgpr_v16i8_idx2:
3818; GFX9:       ; %bb.0:
3819; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3820; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3821; GFX9-NEXT:    s_waitcnt vmcnt(0)
3822; GFX9-NEXT:    v_mov_b32_e32 v2, 8
3823; GFX9-NEXT:    s_mov_b32 s4, 16
3824; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
3825; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3826; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3827; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3828; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
3829; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3830; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
3831; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3832; GFX9-NEXT:    s_setpc_b64 s[30:31]
3833;
3834; GFX8-LABEL: extractelement_vgpr_v16i8_idx2:
3835; GFX8:       ; %bb.0:
3836; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3837; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3838; GFX8-NEXT:    s_waitcnt vmcnt(0)
3839; GFX8-NEXT:    v_mov_b32_e32 v1, 8
3840; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3841; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3842; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3843; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3844; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3845; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
3846; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3847; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3848; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3849; GFX8-NEXT:    s_setpc_b64 s[30:31]
3850;
3851; GFX7-LABEL: extractelement_vgpr_v16i8_idx2:
3852; GFX7:       ; %bb.0:
3853; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3854; GFX7-NEXT:    s_mov_b32 s6, 0
3855; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3856; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3857; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3858; GFX7-NEXT:    s_waitcnt vmcnt(0)
3859; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
3860; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
3861; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
3862; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
3863; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3864; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
3865; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
3866; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
3867; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3868; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3869; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3870; GFX7-NEXT:    s_setpc_b64 s[30:31]
3871;
3872; GFX10-LABEL: extractelement_vgpr_v16i8_idx2:
3873; GFX10:       ; %bb.0:
3874; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3875; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3876; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3877; GFX10-NEXT:    s_waitcnt vmcnt(0)
3878; GFX10-NEXT:    v_mov_b32_e32 v1, 8
3879; GFX10-NEXT:    s_mov_b32 s4, 16
3880; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3881; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v0
3882; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3883; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
3884; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
3885; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
3886; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3887; GFX10-NEXT:    s_setpc_b64 s[30:31]
3888;
3889; GFX11-LABEL: extractelement_vgpr_v16i8_idx2:
3890; GFX11:       ; %bb.0:
3891; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3892; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3893; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
3894; GFX11-NEXT:    s_waitcnt vmcnt(0)
3895; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
3896; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
3897; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3898; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3899; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
3900; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
3901; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
3902; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
3903; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3904; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3905; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
3906; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3907; GFX11-NEXT:    s_setpc_b64 s[30:31]
3908  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
3909  %element = extractelement <16 x i8> %vector, i32 2
3910  ret i8 %element
3911}
3912
3913define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) {
3914; GFX9-LABEL: extractelement_vgpr_v16i8_idx3:
3915; GFX9:       ; %bb.0:
3916; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3917; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3918; GFX9-NEXT:    s_waitcnt vmcnt(0)
3919; GFX9-NEXT:    v_mov_b32_e32 v2, 8
3920; GFX9-NEXT:    v_mov_b32_e32 v1, 0xff
3921; GFX9-NEXT:    v_mov_b32_e32 v3, 16
3922; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
3923; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3924; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3925; GFX9-NEXT:    v_and_or_b32 v0, v0, v1, v2
3926; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
3927; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
3928; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3929; GFX9-NEXT:    s_setpc_b64 s[30:31]
3930;
3931; GFX8-LABEL: extractelement_vgpr_v16i8_idx3:
3932; GFX8:       ; %bb.0:
3933; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3934; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3935; GFX8-NEXT:    s_waitcnt vmcnt(0)
3936; GFX8-NEXT:    v_mov_b32_e32 v1, 8
3937; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3938; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3939; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3940; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3941; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3942; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
3943; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3944; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3945; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3946; GFX8-NEXT:    s_setpc_b64 s[30:31]
3947;
3948; GFX7-LABEL: extractelement_vgpr_v16i8_idx3:
3949; GFX7:       ; %bb.0:
3950; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3951; GFX7-NEXT:    s_mov_b32 s6, 0
3952; GFX7-NEXT:    s_mov_b32 s7, 0xf000
3953; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3954; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
3955; GFX7-NEXT:    s_waitcnt vmcnt(0)
3956; GFX7-NEXT:    v_bfe_u32 v3, v0, 8, 8
3957; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 24, v0
3958; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v0
3959; GFX7-NEXT:    v_bfe_u32 v0, v0, 16, 8
3960; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
3961; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
3962; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
3963; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
3964; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
3965; GFX7-NEXT:    v_or_b32_e32 v0, v0, v1
3966; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3967; GFX7-NEXT:    s_setpc_b64 s[30:31]
3968;
3969; GFX10-LABEL: extractelement_vgpr_v16i8_idx3:
3970; GFX10:       ; %bb.0:
3971; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3972; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3973; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
3974; GFX10-NEXT:    s_waitcnt vmcnt(0)
3975; GFX10-NEXT:    v_mov_b32_e32 v1, 8
3976; GFX10-NEXT:    v_mov_b32_e32 v2, 16
3977; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
3978; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3979; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
3980; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
3981; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
3982; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
3983; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
3984; GFX10-NEXT:    s_setpc_b64 s[30:31]
3985;
3986; GFX11-LABEL: extractelement_vgpr_v16i8_idx3:
3987; GFX11:       ; %bb.0:
3988; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3989; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3990; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
3991; GFX11-NEXT:    s_waitcnt vmcnt(0)
3992; GFX11-NEXT:    v_bfe_u32 v1, v0, 8, 8
3993; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 8
3994; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
3995; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3996; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
3997; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
3998; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
3999; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
4000; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4001; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4002; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
4003; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4004; GFX11-NEXT:    s_setpc_b64 s[30:31]
4005  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4006  %element = extractelement <16 x i8> %vector, i32 3
4007  ret i8 %element
4008}
4009
4010define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) {
4011; GFX9-LABEL: extractelement_vgpr_v16i8_idx4:
4012; GFX9:       ; %bb.0:
4013; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4014; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4015; GFX9-NEXT:    s_waitcnt vmcnt(0)
4016; GFX9-NEXT:    v_mov_b32_e32 v2, 8
4017; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4018; GFX9-NEXT:    v_mov_b32_e32 v3, 16
4019; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
4020; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4021; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4022; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
4023; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
4024; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
4025; GFX9-NEXT:    s_setpc_b64 s[30:31]
4026;
4027; GFX8-LABEL: extractelement_vgpr_v16i8_idx4:
4028; GFX8:       ; %bb.0:
4029; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4030; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4031; GFX8-NEXT:    s_waitcnt vmcnt(0)
4032; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4033; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4034; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4035; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4036; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4037; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4038; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4039; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4040; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4041; GFX8-NEXT:    s_setpc_b64 s[30:31]
4042;
4043; GFX7-LABEL: extractelement_vgpr_v16i8_idx4:
4044; GFX7:       ; %bb.0:
4045; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4046; GFX7-NEXT:    s_mov_b32 s6, 0
4047; GFX7-NEXT:    s_mov_b32 s7, 0xf000
4048; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4049; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
4050; GFX7-NEXT:    s_waitcnt vmcnt(0)
4051; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
4052; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
4053; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
4054; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
4055; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
4056; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4057; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
4058; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
4059; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
4060; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
4061; GFX7-NEXT:    s_setpc_b64 s[30:31]
4062;
4063; GFX10-LABEL: extractelement_vgpr_v16i8_idx4:
4064; GFX10:       ; %bb.0:
4065; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4066; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4067; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4068; GFX10-NEXT:    s_waitcnt vmcnt(0)
4069; GFX10-NEXT:    v_mov_b32_e32 v0, 8
4070; GFX10-NEXT:    v_mov_b32_e32 v2, 16
4071; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4072; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4073; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4074; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
4075; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4076; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
4077; GFX10-NEXT:    s_setpc_b64 s[30:31]
4078;
4079; GFX11-LABEL: extractelement_vgpr_v16i8_idx4:
4080; GFX11:       ; %bb.0:
4081; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4082; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4083; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4084; GFX11-NEXT:    s_waitcnt vmcnt(0)
4085; GFX11-NEXT:    v_bfe_u32 v0, v1, 8, 8
4086; GFX11-NEXT:    v_bfe_u32 v2, v1, 16, 8
4087; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4088; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4089; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
4090; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
4091; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
4092; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
4093; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4094; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4095; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
4096; GFX11-NEXT:    s_setpc_b64 s[30:31]
4097  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4098  %element = extractelement <16 x i8> %vector, i32 4
4099  ret i8 %element
4100}
4101
4102define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) {
4103; GFX9-LABEL: extractelement_vgpr_v16i8_idx5:
4104; GFX9:       ; %bb.0:
4105; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4106; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4107; GFX9-NEXT:    s_mov_b32 s4, 8
4108; GFX9-NEXT:    s_waitcnt vmcnt(0)
4109; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4110; GFX9-NEXT:    v_mov_b32_e32 v2, 16
4111; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4112; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4113; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4114; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v4
4115; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4116; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
4117; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4118; GFX9-NEXT:    s_setpc_b64 s[30:31]
4119;
4120; GFX8-LABEL: extractelement_vgpr_v16i8_idx5:
4121; GFX8:       ; %bb.0:
4122; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4123; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4124; GFX8-NEXT:    s_waitcnt vmcnt(0)
4125; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4126; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4127; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4128; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4129; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4130; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4131; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4132; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4133; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4134; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4135; GFX8-NEXT:    s_setpc_b64 s[30:31]
4136;
4137; GFX7-LABEL: extractelement_vgpr_v16i8_idx5:
4138; GFX7:       ; %bb.0:
4139; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4140; GFX7-NEXT:    s_mov_b32 s6, 0
4141; GFX7-NEXT:    s_mov_b32 s7, 0xf000
4142; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4143; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
4144; GFX7-NEXT:    s_waitcnt vmcnt(0)
4145; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
4146; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
4147; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
4148; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
4149; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
4150; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4151; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
4152; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
4153; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
4154; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
4155; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4156; GFX7-NEXT:    s_setpc_b64 s[30:31]
4157;
4158; GFX10-LABEL: extractelement_vgpr_v16i8_idx5:
4159; GFX10:       ; %bb.0:
4160; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4161; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4162; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4163; GFX10-NEXT:    s_mov_b32 s4, 8
4164; GFX10-NEXT:    s_waitcnt vmcnt(0)
4165; GFX10-NEXT:    v_mov_b32_e32 v0, 16
4166; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4167; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4168; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4169; GFX10-NEXT:    v_and_or_b32 v1, 0xff, v1, v2
4170; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
4171; GFX10-NEXT:    v_or3_b32 v0, v1, v0, v2
4172; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4173; GFX10-NEXT:    s_setpc_b64 s[30:31]
4174;
4175; GFX11-LABEL: extractelement_vgpr_v16i8_idx5:
4176; GFX11:       ; %bb.0:
4177; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4178; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4179; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4180; GFX11-NEXT:    s_waitcnt vmcnt(0)
4181; GFX11-NEXT:    v_bfe_u32 v0, v1, 8, 8
4182; GFX11-NEXT:    v_bfe_u32 v2, v1, 16, 8
4183; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4184; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4185; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
4186; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
4187; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
4188; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
4189; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4190; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4191; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
4192; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4193; GFX11-NEXT:    s_setpc_b64 s[30:31]
4194  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4195  %element = extractelement <16 x i8> %vector, i32 5
4196  ret i8 %element
4197}
4198
4199define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) {
4200; GFX9-LABEL: extractelement_vgpr_v16i8_idx6:
4201; GFX9:       ; %bb.0:
4202; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4203; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4204; GFX9-NEXT:    s_waitcnt vmcnt(0)
4205; GFX9-NEXT:    v_mov_b32_e32 v2, 8
4206; GFX9-NEXT:    s_mov_b32 s4, 16
4207; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4208; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4209; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4210; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4211; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
4212; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4213; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
4214; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4215; GFX9-NEXT:    s_setpc_b64 s[30:31]
4216;
4217; GFX8-LABEL: extractelement_vgpr_v16i8_idx6:
4218; GFX8:       ; %bb.0:
4219; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4220; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4221; GFX8-NEXT:    s_waitcnt vmcnt(0)
4222; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4223; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4224; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4225; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4226; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4227; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4228; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4229; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4230; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4231; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4232; GFX8-NEXT:    s_setpc_b64 s[30:31]
4233;
4234; GFX7-LABEL: extractelement_vgpr_v16i8_idx6:
4235; GFX7:       ; %bb.0:
4236; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4237; GFX7-NEXT:    s_mov_b32 s6, 0
4238; GFX7-NEXT:    s_mov_b32 s7, 0xf000
4239; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4240; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
4241; GFX7-NEXT:    s_waitcnt vmcnt(0)
4242; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
4243; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
4244; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
4245; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
4246; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
4247; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4248; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
4249; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
4250; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
4251; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
4252; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4253; GFX7-NEXT:    s_setpc_b64 s[30:31]
4254;
4255; GFX10-LABEL: extractelement_vgpr_v16i8_idx6:
4256; GFX10:       ; %bb.0:
4257; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4258; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4259; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4260; GFX10-NEXT:    s_waitcnt vmcnt(0)
4261; GFX10-NEXT:    v_mov_b32_e32 v0, 8
4262; GFX10-NEXT:    s_mov_b32 s4, 16
4263; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4264; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v1
4265; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4266; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
4267; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
4268; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
4269; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4270; GFX10-NEXT:    s_setpc_b64 s[30:31]
4271;
4272; GFX11-LABEL: extractelement_vgpr_v16i8_idx6:
4273; GFX11:       ; %bb.0:
4274; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4275; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4276; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4277; GFX11-NEXT:    s_waitcnt vmcnt(0)
4278; GFX11-NEXT:    v_bfe_u32 v0, v1, 8, 8
4279; GFX11-NEXT:    v_bfe_u32 v2, v1, 16, 8
4280; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4281; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4282; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
4283; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
4284; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
4285; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
4286; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4287; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4288; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
4289; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4290; GFX11-NEXT:    s_setpc_b64 s[30:31]
4291  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4292  %element = extractelement <16 x i8> %vector, i32 6
4293  ret i8 %element
4294}
4295
4296define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) {
4297; GFX9-LABEL: extractelement_vgpr_v16i8_idx7:
4298; GFX9:       ; %bb.0:
4299; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4300; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4301; GFX9-NEXT:    s_waitcnt vmcnt(0)
4302; GFX9-NEXT:    v_mov_b32_e32 v2, 8
4303; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4304; GFX9-NEXT:    v_mov_b32_e32 v3, 16
4305; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v1
4306; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4307; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4308; GFX9-NEXT:    v_and_or_b32 v0, v1, v0, v2
4309; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
4310; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
4311; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4312; GFX9-NEXT:    s_setpc_b64 s[30:31]
4313;
4314; GFX8-LABEL: extractelement_vgpr_v16i8_idx7:
4315; GFX8:       ; %bb.0:
4316; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4317; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4318; GFX8-NEXT:    s_waitcnt vmcnt(0)
4319; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4320; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4321; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4322; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4323; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4324; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4325; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4326; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4327; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4328; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4329; GFX8-NEXT:    s_setpc_b64 s[30:31]
4330;
4331; GFX7-LABEL: extractelement_vgpr_v16i8_idx7:
4332; GFX7:       ; %bb.0:
4333; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4334; GFX7-NEXT:    s_mov_b32 s6, 0
4335; GFX7-NEXT:    s_mov_b32 s7, 0xf000
4336; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4337; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
4338; GFX7-NEXT:    s_waitcnt vmcnt(0)
4339; GFX7-NEXT:    v_bfe_u32 v3, v1, 8, 8
4340; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
4341; GFX7-NEXT:    v_and_b32_e32 v2, 0xff, v1
4342; GFX7-NEXT:    v_bfe_u32 v1, v1, 16, 8
4343; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
4344; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4345; GFX7-NEXT:    v_or_b32_e32 v2, v2, v3
4346; GFX7-NEXT:    v_or_b32_e32 v1, v2, v1
4347; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
4348; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
4349; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4350; GFX7-NEXT:    s_setpc_b64 s[30:31]
4351;
4352; GFX10-LABEL: extractelement_vgpr_v16i8_idx7:
4353; GFX10:       ; %bb.0:
4354; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4355; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4356; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4357; GFX10-NEXT:    s_waitcnt vmcnt(0)
4358; GFX10-NEXT:    v_mov_b32_e32 v0, 8
4359; GFX10-NEXT:    v_mov_b32_e32 v2, 16
4360; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4361; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4362; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4363; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
4364; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4365; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
4366; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4367; GFX10-NEXT:    s_setpc_b64 s[30:31]
4368;
4369; GFX11-LABEL: extractelement_vgpr_v16i8_idx7:
4370; GFX11:       ; %bb.0:
4371; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4372; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4373; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4374; GFX11-NEXT:    s_waitcnt vmcnt(0)
4375; GFX11-NEXT:    v_bfe_u32 v0, v1, 8, 8
4376; GFX11-NEXT:    v_bfe_u32 v2, v1, 16, 8
4377; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v1
4378; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4379; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
4380; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
4381; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
4382; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v1, v0
4383; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4384; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4385; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v1
4386; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4387; GFX11-NEXT:    s_setpc_b64 s[30:31]
4388  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4389  %element = extractelement <16 x i8> %vector, i32 7
4390  ret i8 %element
4391}
4392
4393define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) {
4394; GFX9-LABEL: extractelement_vgpr_v16i8_idx8:
4395; GFX9:       ; %bb.0:
4396; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4397; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4398; GFX9-NEXT:    s_waitcnt vmcnt(0)
4399; GFX9-NEXT:    v_mov_b32_e32 v1, 8
4400; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4401; GFX9-NEXT:    v_mov_b32_e32 v3, 16
4402; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v2
4403; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4404; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4405; GFX9-NEXT:    v_and_or_b32 v0, v2, v0, v1
4406; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
4407; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
4408; GFX9-NEXT:    s_setpc_b64 s[30:31]
4409;
4410; GFX8-LABEL: extractelement_vgpr_v16i8_idx8:
4411; GFX8:       ; %bb.0:
4412; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4413; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4414; GFX8-NEXT:    s_waitcnt vmcnt(0)
4415; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4416; GFX8-NEXT:    v_mov_b32_e32 v1, 16
4417; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4418; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4419; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4420; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4421; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4422; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4423; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4424; GFX8-NEXT:    s_setpc_b64 s[30:31]
4425;
4426; GFX7-LABEL: extractelement_vgpr_v16i8_idx8:
4427; GFX7:       ; %bb.0:
4428; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4429; GFX7-NEXT:    s_mov_b32 s6, 0
4430; GFX7-NEXT:    s_mov_b32 s7, 0xf000
4431; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4432; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
4433; GFX7-NEXT:    s_waitcnt vmcnt(0)
4434; GFX7-NEXT:    v_bfe_u32 v3, v2, 8, 8
4435; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v2
4436; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v2
4437; GFX7-NEXT:    v_bfe_u32 v2, v2, 16, 8
4438; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
4439; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
4440; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
4441; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
4442; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
4443; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
4444; GFX7-NEXT:    s_setpc_b64 s[30:31]
4445;
4446; GFX10-LABEL: extractelement_vgpr_v16i8_idx8:
4447; GFX10:       ; %bb.0:
4448; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4449; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4450; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4451; GFX10-NEXT:    s_waitcnt vmcnt(0)
4452; GFX10-NEXT:    v_mov_b32_e32 v0, 8
4453; GFX10-NEXT:    v_mov_b32_e32 v1, 16
4454; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4455; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4456; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4457; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v2, v0
4458; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
4459; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
4460; GFX10-NEXT:    s_setpc_b64 s[30:31]
4461;
4462; GFX11-LABEL: extractelement_vgpr_v16i8_idx8:
4463; GFX11:       ; %bb.0:
4464; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4465; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4466; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4467; GFX11-NEXT:    s_waitcnt vmcnt(0)
4468; GFX11-NEXT:    v_bfe_u32 v0, v2, 8, 8
4469; GFX11-NEXT:    v_bfe_u32 v1, v2, 16, 8
4470; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4471; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4472; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
4473; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4474; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
4475; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v2, v0
4476; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
4477; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4478; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
4479; GFX11-NEXT:    s_setpc_b64 s[30:31]
4480  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4481  %element = extractelement <16 x i8> %vector, i32 8
4482  ret i8 %element
4483}
4484
4485define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) {
4486; GFX9-LABEL: extractelement_vgpr_v16i8_idx9:
4487; GFX9:       ; %bb.0:
4488; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4489; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4490; GFX9-NEXT:    s_mov_b32 s4, 8
4491; GFX9-NEXT:    s_waitcnt vmcnt(0)
4492; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4493; GFX9-NEXT:    v_mov_b32_e32 v1, 16
4494; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4495; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4496; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4497; GFX9-NEXT:    v_and_or_b32 v0, v2, v0, v4
4498; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
4499; GFX9-NEXT:    v_or3_b32 v0, v0, v1, v2
4500; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4501; GFX9-NEXT:    s_setpc_b64 s[30:31]
4502;
4503; GFX8-LABEL: extractelement_vgpr_v16i8_idx9:
4504; GFX8:       ; %bb.0:
4505; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4506; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4507; GFX8-NEXT:    s_waitcnt vmcnt(0)
4508; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4509; GFX8-NEXT:    v_mov_b32_e32 v1, 16
4510; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4511; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4512; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4513; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4514; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4515; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4516; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4517; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4518; GFX8-NEXT:    s_setpc_b64 s[30:31]
4519;
4520; GFX7-LABEL: extractelement_vgpr_v16i8_idx9:
4521; GFX7:       ; %bb.0:
4522; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4523; GFX7-NEXT:    s_mov_b32 s6, 0
4524; GFX7-NEXT:    s_mov_b32 s7, 0xf000
4525; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4526; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
4527; GFX7-NEXT:    s_waitcnt vmcnt(0)
4528; GFX7-NEXT:    v_bfe_u32 v3, v2, 8, 8
4529; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v2
4530; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v2
4531; GFX7-NEXT:    v_bfe_u32 v2, v2, 16, 8
4532; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
4533; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
4534; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
4535; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
4536; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
4537; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
4538; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4539; GFX7-NEXT:    s_setpc_b64 s[30:31]
4540;
4541; GFX10-LABEL: extractelement_vgpr_v16i8_idx9:
4542; GFX10:       ; %bb.0:
4543; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4544; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4545; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4546; GFX10-NEXT:    s_mov_b32 s4, 8
4547; GFX10-NEXT:    s_waitcnt vmcnt(0)
4548; GFX10-NEXT:    v_mov_b32_e32 v0, 16
4549; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4550; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4551; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4552; GFX10-NEXT:    v_and_or_b32 v1, 0xff, v2, v1
4553; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
4554; GFX10-NEXT:    v_or3_b32 v0, v1, v0, v2
4555; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4556; GFX10-NEXT:    s_setpc_b64 s[30:31]
4557;
4558; GFX11-LABEL: extractelement_vgpr_v16i8_idx9:
4559; GFX11:       ; %bb.0:
4560; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4561; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4562; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4563; GFX11-NEXT:    s_waitcnt vmcnt(0)
4564; GFX11-NEXT:    v_bfe_u32 v0, v2, 8, 8
4565; GFX11-NEXT:    v_bfe_u32 v1, v2, 16, 8
4566; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4567; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4568; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
4569; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4570; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
4571; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v2, v0
4572; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
4573; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4574; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
4575; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4576; GFX11-NEXT:    s_setpc_b64 s[30:31]
4577  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4578  %element = extractelement <16 x i8> %vector, i32 9
4579  ret i8 %element
4580}
4581
4582define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) {
4583; GFX9-LABEL: extractelement_vgpr_v16i8_idx10:
4584; GFX9:       ; %bb.0:
4585; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4586; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4587; GFX9-NEXT:    s_waitcnt vmcnt(0)
4588; GFX9-NEXT:    v_mov_b32_e32 v1, 8
4589; GFX9-NEXT:    s_mov_b32 s4, 16
4590; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4591; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4592; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4593; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4594; GFX9-NEXT:    v_and_or_b32 v0, v2, v0, v1
4595; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4596; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
4597; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4598; GFX9-NEXT:    s_setpc_b64 s[30:31]
4599;
4600; GFX8-LABEL: extractelement_vgpr_v16i8_idx10:
4601; GFX8:       ; %bb.0:
4602; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4603; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4604; GFX8-NEXT:    s_waitcnt vmcnt(0)
4605; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4606; GFX8-NEXT:    v_mov_b32_e32 v1, 16
4607; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4608; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4609; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4610; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4611; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4612; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4613; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4614; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4615; GFX8-NEXT:    s_setpc_b64 s[30:31]
4616;
4617; GFX7-LABEL: extractelement_vgpr_v16i8_idx10:
4618; GFX7:       ; %bb.0:
4619; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4620; GFX7-NEXT:    s_mov_b32 s6, 0
4621; GFX7-NEXT:    s_mov_b32 s7, 0xf000
4622; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4623; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
4624; GFX7-NEXT:    s_waitcnt vmcnt(0)
4625; GFX7-NEXT:    v_bfe_u32 v3, v2, 8, 8
4626; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v2
4627; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v2
4628; GFX7-NEXT:    v_bfe_u32 v2, v2, 16, 8
4629; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
4630; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
4631; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
4632; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
4633; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
4634; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
4635; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4636; GFX7-NEXT:    s_setpc_b64 s[30:31]
4637;
4638; GFX10-LABEL: extractelement_vgpr_v16i8_idx10:
4639; GFX10:       ; %bb.0:
4640; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4641; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4642; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4643; GFX10-NEXT:    s_waitcnt vmcnt(0)
4644; GFX10-NEXT:    v_mov_b32_e32 v0, 8
4645; GFX10-NEXT:    s_mov_b32 s4, 16
4646; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4647; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 24, v2
4648; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4649; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v2, v0
4650; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
4651; GFX10-NEXT:    v_or3_b32 v0, v0, v3, v1
4652; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4653; GFX10-NEXT:    s_setpc_b64 s[30:31]
4654;
4655; GFX11-LABEL: extractelement_vgpr_v16i8_idx10:
4656; GFX11:       ; %bb.0:
4657; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4658; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4659; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4660; GFX11-NEXT:    s_waitcnt vmcnt(0)
4661; GFX11-NEXT:    v_bfe_u32 v0, v2, 8, 8
4662; GFX11-NEXT:    v_bfe_u32 v1, v2, 16, 8
4663; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4664; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4665; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
4666; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4667; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
4668; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v2, v0
4669; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
4670; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4671; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
4672; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4673; GFX11-NEXT:    s_setpc_b64 s[30:31]
4674  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4675  %element = extractelement <16 x i8> %vector, i32 10
4676  ret i8 %element
4677}
4678
4679define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) {
4680; GFX9-LABEL: extractelement_vgpr_v16i8_idx11:
4681; GFX9:       ; %bb.0:
4682; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4683; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4684; GFX9-NEXT:    s_waitcnt vmcnt(0)
4685; GFX9-NEXT:    v_mov_b32_e32 v1, 8
4686; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4687; GFX9-NEXT:    v_mov_b32_e32 v3, 16
4688; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v2
4689; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4690; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4691; GFX9-NEXT:    v_and_or_b32 v0, v2, v0, v1
4692; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
4693; GFX9-NEXT:    v_or3_b32 v0, v0, v3, v1
4694; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4695; GFX9-NEXT:    s_setpc_b64 s[30:31]
4696;
4697; GFX8-LABEL: extractelement_vgpr_v16i8_idx11:
4698; GFX8:       ; %bb.0:
4699; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4700; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4701; GFX8-NEXT:    s_waitcnt vmcnt(0)
4702; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4703; GFX8-NEXT:    v_mov_b32_e32 v1, 16
4704; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4705; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4706; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4707; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4708; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4709; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
4710; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4711; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4712; GFX8-NEXT:    s_setpc_b64 s[30:31]
4713;
4714; GFX7-LABEL: extractelement_vgpr_v16i8_idx11:
4715; GFX7:       ; %bb.0:
4716; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4717; GFX7-NEXT:    s_mov_b32 s6, 0
4718; GFX7-NEXT:    s_mov_b32 s7, 0xf000
4719; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4720; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
4721; GFX7-NEXT:    s_waitcnt vmcnt(0)
4722; GFX7-NEXT:    v_bfe_u32 v3, v2, 8, 8
4723; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v2
4724; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v2
4725; GFX7-NEXT:    v_bfe_u32 v2, v2, 16, 8
4726; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
4727; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
4728; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
4729; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
4730; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
4731; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
4732; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4733; GFX7-NEXT:    s_setpc_b64 s[30:31]
4734;
4735; GFX10-LABEL: extractelement_vgpr_v16i8_idx11:
4736; GFX10:       ; %bb.0:
4737; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4738; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4739; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4740; GFX10-NEXT:    s_waitcnt vmcnt(0)
4741; GFX10-NEXT:    v_mov_b32_e32 v0, 8
4742; GFX10-NEXT:    v_mov_b32_e32 v1, 16
4743; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4744; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4745; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4746; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v2, v0
4747; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
4748; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
4749; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4750; GFX10-NEXT:    s_setpc_b64 s[30:31]
4751;
4752; GFX11-LABEL: extractelement_vgpr_v16i8_idx11:
4753; GFX11:       ; %bb.0:
4754; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4755; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4756; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4757; GFX11-NEXT:    s_waitcnt vmcnt(0)
4758; GFX11-NEXT:    v_bfe_u32 v0, v2, 8, 8
4759; GFX11-NEXT:    v_bfe_u32 v1, v2, 16, 8
4760; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v2
4761; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4762; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
4763; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4764; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
4765; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v2, v0
4766; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
4767; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4768; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
4769; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
4770; GFX11-NEXT:    s_setpc_b64 s[30:31]
4771  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4772  %element = extractelement <16 x i8> %vector, i32 11
4773  ret i8 %element
4774}
4775
4776define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) {
4777; GFX9-LABEL: extractelement_vgpr_v16i8_idx12:
4778; GFX9:       ; %bb.0:
4779; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4780; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4781; GFX9-NEXT:    s_waitcnt vmcnt(0)
4782; GFX9-NEXT:    v_mov_b32_e32 v1, 8
4783; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4784; GFX9-NEXT:    v_mov_b32_e32 v2, 16
4785; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v3
4786; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4787; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4788; GFX9-NEXT:    v_and_or_b32 v0, v3, v0, v1
4789; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
4790; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
4791; GFX9-NEXT:    s_setpc_b64 s[30:31]
4792;
4793; GFX8-LABEL: extractelement_vgpr_v16i8_idx12:
4794; GFX8:       ; %bb.0:
4795; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4796; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4797; GFX8-NEXT:    s_waitcnt vmcnt(0)
4798; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4799; GFX8-NEXT:    v_mov_b32_e32 v1, 16
4800; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4801; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
4802; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4803; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4804; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4805; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
4806; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4807; GFX8-NEXT:    s_setpc_b64 s[30:31]
4808;
4809; GFX7-LABEL: extractelement_vgpr_v16i8_idx12:
4810; GFX7:       ; %bb.0:
4811; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4812; GFX7-NEXT:    s_mov_b32 s6, 0
4813; GFX7-NEXT:    s_mov_b32 s7, 0xf000
4814; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4815; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
4816; GFX7-NEXT:    s_waitcnt vmcnt(0)
4817; GFX7-NEXT:    v_bfe_u32 v2, v3, 8, 8
4818; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v3
4819; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v3
4820; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
4821; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
4822; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
4823; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
4824; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
4825; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
4826; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
4827; GFX7-NEXT:    s_setpc_b64 s[30:31]
4828;
4829; GFX10-LABEL: extractelement_vgpr_v16i8_idx12:
4830; GFX10:       ; %bb.0:
4831; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4832; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4833; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4834; GFX10-NEXT:    s_waitcnt vmcnt(0)
4835; GFX10-NEXT:    v_mov_b32_e32 v0, 8
4836; GFX10-NEXT:    v_mov_b32_e32 v1, 16
4837; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4838; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
4839; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4840; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v3, v0
4841; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
4842; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
4843; GFX10-NEXT:    s_setpc_b64 s[30:31]
4844;
4845; GFX11-LABEL: extractelement_vgpr_v16i8_idx12:
4846; GFX11:       ; %bb.0:
4847; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4848; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4849; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4850; GFX11-NEXT:    s_waitcnt vmcnt(0)
4851; GFX11-NEXT:    v_bfe_u32 v0, v3, 8, 8
4852; GFX11-NEXT:    v_bfe_u32 v1, v3, 16, 8
4853; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
4854; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4855; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
4856; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4857; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4858; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
4859; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v3, v0
4860; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4861; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
4862; GFX11-NEXT:    s_setpc_b64 s[30:31]
4863  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4864  %element = extractelement <16 x i8> %vector, i32 12
4865  ret i8 %element
4866}
4867
4868define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) {
4869; GFX9-LABEL: extractelement_vgpr_v16i8_idx13:
4870; GFX9:       ; %bb.0:
4871; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4872; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4873; GFX9-NEXT:    s_mov_b32 s4, 8
4874; GFX9-NEXT:    s_waitcnt vmcnt(0)
4875; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4876; GFX9-NEXT:    v_mov_b32_e32 v1, 16
4877; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
4878; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4879; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4880; GFX9-NEXT:    v_and_or_b32 v0, v3, v0, v4
4881; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
4882; GFX9-NEXT:    v_or3_b32 v0, v0, v1, v2
4883; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4884; GFX9-NEXT:    s_setpc_b64 s[30:31]
4885;
4886; GFX8-LABEL: extractelement_vgpr_v16i8_idx13:
4887; GFX8:       ; %bb.0:
4888; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4889; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4890; GFX8-NEXT:    s_waitcnt vmcnt(0)
4891; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4892; GFX8-NEXT:    v_mov_b32_e32 v1, 16
4893; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4894; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
4895; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4896; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4897; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4898; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
4899; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4900; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4901; GFX8-NEXT:    s_setpc_b64 s[30:31]
4902;
4903; GFX7-LABEL: extractelement_vgpr_v16i8_idx13:
4904; GFX7:       ; %bb.0:
4905; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4906; GFX7-NEXT:    s_mov_b32 s6, 0
4907; GFX7-NEXT:    s_mov_b32 s7, 0xf000
4908; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4909; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
4910; GFX7-NEXT:    s_waitcnt vmcnt(0)
4911; GFX7-NEXT:    v_bfe_u32 v2, v3, 8, 8
4912; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v3
4913; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v3
4914; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
4915; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
4916; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
4917; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
4918; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
4919; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
4920; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
4921; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4922; GFX7-NEXT:    s_setpc_b64 s[30:31]
4923;
4924; GFX10-LABEL: extractelement_vgpr_v16i8_idx13:
4925; GFX10:       ; %bb.0:
4926; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4927; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4928; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4929; GFX10-NEXT:    s_mov_b32 s4, 8
4930; GFX10-NEXT:    s_waitcnt vmcnt(0)
4931; GFX10-NEXT:    v_mov_b32_e32 v0, 16
4932; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4933; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
4934; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4935; GFX10-NEXT:    v_and_or_b32 v1, 0xff, v3, v1
4936; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
4937; GFX10-NEXT:    v_or3_b32 v0, v1, v0, v2
4938; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4939; GFX10-NEXT:    s_setpc_b64 s[30:31]
4940;
4941; GFX11-LABEL: extractelement_vgpr_v16i8_idx13:
4942; GFX11:       ; %bb.0:
4943; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4944; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4945; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
4946; GFX11-NEXT:    s_waitcnt vmcnt(0)
4947; GFX11-NEXT:    v_bfe_u32 v0, v3, 8, 8
4948; GFX11-NEXT:    v_bfe_u32 v1, v3, 16, 8
4949; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
4950; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4951; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
4952; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4953; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4954; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
4955; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v3, v0
4956; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4957; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
4958; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
4959; GFX11-NEXT:    s_setpc_b64 s[30:31]
4960  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
4961  %element = extractelement <16 x i8> %vector, i32 13
4962  ret i8 %element
4963}
4964
4965define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) {
4966; GFX9-LABEL: extractelement_vgpr_v16i8_idx14:
4967; GFX9:       ; %bb.0:
4968; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4969; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
4970; GFX9-NEXT:    s_waitcnt vmcnt(0)
4971; GFX9-NEXT:    v_mov_b32_e32 v1, 8
4972; GFX9-NEXT:    s_mov_b32 s4, 16
4973; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
4974; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
4975; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4976; GFX9-NEXT:    v_lshlrev_b32_sdwa v4, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4977; GFX9-NEXT:    v_and_or_b32 v0, v3, v0, v1
4978; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
4979; GFX9-NEXT:    v_or3_b32 v0, v0, v4, v1
4980; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4981; GFX9-NEXT:    s_setpc_b64 s[30:31]
4982;
4983; GFX8-LABEL: extractelement_vgpr_v16i8_idx14:
4984; GFX8:       ; %bb.0:
4985; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4986; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4987; GFX8-NEXT:    s_waitcnt vmcnt(0)
4988; GFX8-NEXT:    v_mov_b32_e32 v0, 8
4989; GFX8-NEXT:    v_mov_b32_e32 v1, 16
4990; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
4991; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
4992; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
4993; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4994; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4995; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
4996; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4997; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4998; GFX8-NEXT:    s_setpc_b64 s[30:31]
4999;
5000; GFX7-LABEL: extractelement_vgpr_v16i8_idx14:
5001; GFX7:       ; %bb.0:
5002; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5003; GFX7-NEXT:    s_mov_b32 s6, 0
5004; GFX7-NEXT:    s_mov_b32 s7, 0xf000
5005; GFX7-NEXT:    s_mov_b64 s[4:5], 0
5006; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
5007; GFX7-NEXT:    s_waitcnt vmcnt(0)
5008; GFX7-NEXT:    v_bfe_u32 v2, v3, 8, 8
5009; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v3
5010; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v3
5011; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
5012; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
5013; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
5014; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
5015; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
5016; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
5017; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
5018; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5019; GFX7-NEXT:    s_setpc_b64 s[30:31]
5020;
5021; GFX10-LABEL: extractelement_vgpr_v16i8_idx14:
5022; GFX10:       ; %bb.0:
5023; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5024; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5025; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
5026; GFX10-NEXT:    s_waitcnt vmcnt(0)
5027; GFX10-NEXT:    v_mov_b32_e32 v0, 8
5028; GFX10-NEXT:    s_mov_b32 s4, 16
5029; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
5030; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 24, v3
5031; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
5032; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v3, v0
5033; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
5034; GFX10-NEXT:    v_or3_b32 v0, v0, v2, v1
5035; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5036; GFX10-NEXT:    s_setpc_b64 s[30:31]
5037;
5038; GFX11-LABEL: extractelement_vgpr_v16i8_idx14:
5039; GFX11:       ; %bb.0:
5040; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5041; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5042; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
5043; GFX11-NEXT:    s_waitcnt vmcnt(0)
5044; GFX11-NEXT:    v_bfe_u32 v0, v3, 8, 8
5045; GFX11-NEXT:    v_bfe_u32 v1, v3, 16, 8
5046; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
5047; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
5048; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
5049; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
5050; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
5051; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
5052; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v3, v0
5053; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5054; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
5055; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5056; GFX11-NEXT:    s_setpc_b64 s[30:31]
5057  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
5058  %element = extractelement <16 x i8> %vector, i32 14
5059  ret i8 %element
5060}
5061
5062define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) {
5063; GFX9-LABEL: extractelement_vgpr_v16i8_idx15:
5064; GFX9:       ; %bb.0:
5065; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5066; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
5067; GFX9-NEXT:    s_waitcnt vmcnt(0)
5068; GFX9-NEXT:    v_mov_b32_e32 v1, 8
5069; GFX9-NEXT:    v_mov_b32_e32 v0, 0xff
5070; GFX9-NEXT:    v_mov_b32_e32 v2, 16
5071; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v3
5072; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
5073; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
5074; GFX9-NEXT:    v_and_or_b32 v0, v3, v0, v1
5075; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 24, v4
5076; GFX9-NEXT:    v_or3_b32 v0, v0, v2, v1
5077; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
5078; GFX9-NEXT:    s_setpc_b64 s[30:31]
5079;
5080; GFX8-LABEL: extractelement_vgpr_v16i8_idx15:
5081; GFX8:       ; %bb.0:
5082; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5083; GFX8-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
5084; GFX8-NEXT:    s_waitcnt vmcnt(0)
5085; GFX8-NEXT:    v_mov_b32_e32 v0, 8
5086; GFX8-NEXT:    v_mov_b32_e32 v1, 16
5087; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
5088; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
5089; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
5090; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
5091; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
5092; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
5093; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
5094; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
5095; GFX8-NEXT:    s_setpc_b64 s[30:31]
5096;
5097; GFX7-LABEL: extractelement_vgpr_v16i8_idx15:
5098; GFX7:       ; %bb.0:
5099; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5100; GFX7-NEXT:    s_mov_b32 s6, 0
5101; GFX7-NEXT:    s_mov_b32 s7, 0xf000
5102; GFX7-NEXT:    s_mov_b64 s[4:5], 0
5103; GFX7-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
5104; GFX7-NEXT:    s_waitcnt vmcnt(0)
5105; GFX7-NEXT:    v_bfe_u32 v2, v3, 8, 8
5106; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v3
5107; GFX7-NEXT:    v_and_b32_e32 v1, 0xff, v3
5108; GFX7-NEXT:    v_bfe_u32 v3, v3, 16, 8
5109; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
5110; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
5111; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
5112; GFX7-NEXT:    v_or_b32_e32 v1, v1, v3
5113; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
5114; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
5115; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
5116; GFX7-NEXT:    s_setpc_b64 s[30:31]
5117;
5118; GFX10-LABEL: extractelement_vgpr_v16i8_idx15:
5119; GFX10:       ; %bb.0:
5120; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5121; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5122; GFX10-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
5123; GFX10-NEXT:    s_waitcnt vmcnt(0)
5124; GFX10-NEXT:    v_mov_b32_e32 v0, 8
5125; GFX10-NEXT:    v_mov_b32_e32 v1, 16
5126; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
5127; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
5128; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
5129; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v3, v0
5130; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
5131; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
5132; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
5133; GFX10-NEXT:    s_setpc_b64 s[30:31]
5134;
5135; GFX11-LABEL: extractelement_vgpr_v16i8_idx15:
5136; GFX11:       ; %bb.0:
5137; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5138; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5139; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
5140; GFX11-NEXT:    s_waitcnt vmcnt(0)
5141; GFX11-NEXT:    v_bfe_u32 v0, v3, 8, 8
5142; GFX11-NEXT:    v_bfe_u32 v1, v3, 16, 8
5143; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 24, v3
5144; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
5145; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
5146; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
5147; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
5148; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
5149; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v3, v0
5150; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5151; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
5152; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
5153; GFX11-NEXT:    s_setpc_b64 s[30:31]
5154  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
5155  %element = extractelement <16 x i8> %vector, i32 15
5156  ret i8 %element
5157}
5158