1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s
4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s
5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
6
7define <3 x i32> @load_lds_v3i32(<3 x i32> addrspace(3)* %ptr) {
8; GFX9-LABEL: load_lds_v3i32:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    ds_read_b96 v[0:2], v0
12; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
13; GFX9-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX7-LABEL: load_lds_v3i32:
16; GFX7:       ; %bb.0:
17; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX7-NEXT:    s_mov_b32 m0, -1
19; GFX7-NEXT:    ds_read_b96 v[0:2], v0
20; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
21; GFX7-NEXT:    s_setpc_b64 s[30:31]
22;
23; GFX6-LABEL: load_lds_v3i32:
24; GFX6:       ; %bb.0:
25; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GFX6-NEXT:    v_mov_b32_e32 v2, v0
27; GFX6-NEXT:    s_mov_b32 m0, -1
28; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 8, v2
29; GFX6-NEXT:    ds_read_b64 v[0:1], v0
30; GFX6-NEXT:    ds_read_b32 v2, v2
31; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
32; GFX6-NEXT:    s_setpc_b64 s[30:31]
33;
34; GFX10-LABEL: load_lds_v3i32:
35; GFX10:       ; %bb.0:
36; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
38; GFX10-NEXT:    ds_read_b96 v[0:2], v0
39; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
40; GFX10-NEXT:    s_setpc_b64 s[30:31]
41  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr
42  ret <3 x i32> %load
43}
44
45define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) {
46; GFX9-LABEL: load_lds_v3i32_align1:
47; GFX9:       ; %bb.0:
48; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GFX9-NEXT:    ds_read_u8 v1, v0
50; GFX9-NEXT:    ds_read_u8 v2, v0 offset:1
51; GFX9-NEXT:    ds_read_u8 v3, v0 offset:2
52; GFX9-NEXT:    ds_read_u8 v4, v0 offset:3
53; GFX9-NEXT:    ds_read_u8 v5, v0 offset:4
54; GFX9-NEXT:    ds_read_u8 v6, v0 offset:5
55; GFX9-NEXT:    ds_read_u8 v7, v0 offset:6
56; GFX9-NEXT:    ds_read_u8 v8, v0 offset:7
57; GFX9-NEXT:    ds_read_u8 v9, v0 offset:8
58; GFX9-NEXT:    ds_read_u8 v10, v0 offset:9
59; GFX9-NEXT:    ds_read_u8 v11, v0 offset:10
60; GFX9-NEXT:    ds_read_u8 v12, v0 offset:11
61; GFX9-NEXT:    s_waitcnt lgkmcnt(10)
62; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 8, v1
63; GFX9-NEXT:    s_waitcnt lgkmcnt(8)
64; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
65; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
66; GFX9-NEXT:    s_waitcnt lgkmcnt(6)
67; GFX9-NEXT:    v_lshl_or_b32 v1, v6, 8, v5
68; GFX9-NEXT:    s_waitcnt lgkmcnt(4)
69; GFX9-NEXT:    v_lshl_or_b32 v2, v8, 8, v7
70; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
71; GFX9-NEXT:    s_waitcnt lgkmcnt(2)
72; GFX9-NEXT:    v_lshl_or_b32 v2, v10, 8, v9
73; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
74; GFX9-NEXT:    v_lshl_or_b32 v3, v12, 8, v11
75; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
76; GFX9-NEXT:    s_setpc_b64 s[30:31]
77;
78; GFX7-LABEL: load_lds_v3i32_align1:
79; GFX7:       ; %bb.0:
80; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81; GFX7-NEXT:    s_mov_b32 m0, -1
82; GFX7-NEXT:    ds_read_u8 v1, v0 offset:6
83; GFX7-NEXT:    ds_read_u8 v2, v0 offset:4
84; GFX7-NEXT:    ds_read_u8 v3, v0 offset:2
85; GFX7-NEXT:    ds_read_u8 v4, v0 offset:1
86; GFX7-NEXT:    ds_read_u8 v5, v0
87; GFX7-NEXT:    ds_read_u8 v6, v0 offset:3
88; GFX7-NEXT:    ds_read_u8 v7, v0 offset:5
89; GFX7-NEXT:    ds_read_u8 v8, v0 offset:7
90; GFX7-NEXT:    s_waitcnt lgkmcnt(4)
91; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
92; GFX7-NEXT:    s_waitcnt lgkmcnt(3)
93; GFX7-NEXT:    v_or_b32_e32 v4, v4, v5
94; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
95; GFX7-NEXT:    v_lshlrev_b32_e32 v5, 8, v6
96; GFX7-NEXT:    v_or_b32_e32 v3, v5, v3
97; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
98; GFX7-NEXT:    v_or_b32_e32 v3, v3, v4
99; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
100; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 8, v7
101; GFX7-NEXT:    ds_read_u8 v5, v0 offset:11
102; GFX7-NEXT:    ds_read_u8 v6, v0 offset:10
103; GFX7-NEXT:    ds_read_u8 v7, v0 offset:9
104; GFX7-NEXT:    ds_read_u8 v0, v0 offset:8
105; GFX7-NEXT:    v_or_b32_e32 v2, v4, v2
106; GFX7-NEXT:    s_waitcnt lgkmcnt(4)
107; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 8, v8
108; GFX7-NEXT:    v_or_b32_e32 v1, v4, v1
109; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
110; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
111; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
112; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v7
113; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
114; GFX7-NEXT:    v_or_b32_e32 v0, v2, v0
115; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 8, v5
116; GFX7-NEXT:    v_or_b32_e32 v2, v2, v6
117; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
118; GFX7-NEXT:    v_or_b32_e32 v2, v2, v0
119; GFX7-NEXT:    v_mov_b32_e32 v0, v3
120; GFX7-NEXT:    s_setpc_b64 s[30:31]
121;
122; GFX6-LABEL: load_lds_v3i32_align1:
123; GFX6:       ; %bb.0:
124; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 5, v0
126; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 4, v0
127; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 7, v0
128; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 6, v0
129; GFX6-NEXT:    v_add_i32_e32 v5, vcc, 9, v0
130; GFX6-NEXT:    v_add_i32_e32 v6, vcc, 8, v0
131; GFX6-NEXT:    v_add_i32_e32 v7, vcc, 11, v0
132; GFX6-NEXT:    s_mov_b32 m0, -1
133; GFX6-NEXT:    ds_read_u8 v1, v1
134; GFX6-NEXT:    ds_read_u8 v2, v2
135; GFX6-NEXT:    ds_read_u8 v3, v3
136; GFX6-NEXT:    ds_read_u8 v4, v4
137; GFX6-NEXT:    ds_read_u8 v5, v5
138; GFX6-NEXT:    ds_read_u8 v6, v6
139; GFX6-NEXT:    ds_read_u8 v7, v7
140; GFX6-NEXT:    ds_read_u8 v8, v0
141; GFX6-NEXT:    s_waitcnt lgkmcnt(7)
142; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
143; GFX6-NEXT:    s_waitcnt lgkmcnt(6)
144; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
145; GFX6-NEXT:    s_waitcnt lgkmcnt(5)
146; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v3
147; GFX6-NEXT:    s_waitcnt lgkmcnt(4)
148; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
149; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 10, v0
150; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
151; GFX6-NEXT:    ds_read_u8 v4, v4
152; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
153; GFX6-NEXT:    s_waitcnt lgkmcnt(4)
154; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v5
155; GFX6-NEXT:    s_waitcnt lgkmcnt(3)
156; GFX6-NEXT:    v_or_b32_e32 v2, v2, v6
157; GFX6-NEXT:    v_add_i32_e32 v5, vcc, 3, v0
158; GFX6-NEXT:    v_add_i32_e32 v6, vcc, 2, v0
159; GFX6-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
160; GFX6-NEXT:    ds_read_u8 v5, v5
161; GFX6-NEXT:    ds_read_u8 v6, v6
162; GFX6-NEXT:    ds_read_u8 v0, v0
163; GFX6-NEXT:    s_waitcnt lgkmcnt(5)
164; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 8, v7
165; GFX6-NEXT:    s_waitcnt lgkmcnt(3)
166; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
167; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
168; GFX6-NEXT:    v_or_b32_e32 v2, v3, v2
169; GFX6-NEXT:    s_waitcnt lgkmcnt(2)
170; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 8, v5
171; GFX6-NEXT:    s_waitcnt lgkmcnt(1)
172; GFX6-NEXT:    v_or_b32_e32 v3, v3, v6
173; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
174; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
175; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
176; GFX6-NEXT:    v_or_b32_e32 v0, v0, v8
177; GFX6-NEXT:    v_or_b32_e32 v0, v3, v0
178; GFX6-NEXT:    s_setpc_b64 s[30:31]
179;
180; GFX10-LABEL: load_lds_v3i32_align1:
181; GFX10:       ; %bb.0:
182; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
184; GFX10-NEXT:    ds_read_u8 v1, v0
185; GFX10-NEXT:    ds_read_u8 v2, v0 offset:1
186; GFX10-NEXT:    ds_read_u8 v3, v0 offset:2
187; GFX10-NEXT:    ds_read_u8 v4, v0 offset:3
188; GFX10-NEXT:    ds_read_u8 v5, v0 offset:4
189; GFX10-NEXT:    ds_read_u8 v6, v0 offset:5
190; GFX10-NEXT:    ds_read_u8 v7, v0 offset:6
191; GFX10-NEXT:    ds_read_u8 v8, v0 offset:7
192; GFX10-NEXT:    ds_read_u8 v9, v0 offset:8
193; GFX10-NEXT:    ds_read_u8 v10, v0 offset:9
194; GFX10-NEXT:    ds_read_u8 v11, v0 offset:10
195; GFX10-NEXT:    ds_read_u8 v0, v0 offset:11
196; GFX10-NEXT:    s_waitcnt lgkmcnt(10)
197; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 8, v1
198; GFX10-NEXT:    s_waitcnt lgkmcnt(8)
199; GFX10-NEXT:    v_lshl_or_b32 v2, v4, 8, v3
200; GFX10-NEXT:    s_waitcnt lgkmcnt(6)
201; GFX10-NEXT:    v_lshl_or_b32 v3, v6, 8, v5
202; GFX10-NEXT:    s_waitcnt lgkmcnt(4)
203; GFX10-NEXT:    v_lshl_or_b32 v4, v8, 8, v7
204; GFX10-NEXT:    s_waitcnt lgkmcnt(2)
205; GFX10-NEXT:    v_lshl_or_b32 v5, v10, 8, v9
206; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
207; GFX10-NEXT:    v_lshl_or_b32 v6, v0, 8, v11
208; GFX10-NEXT:    v_lshl_or_b32 v0, v2, 16, v1
209; GFX10-NEXT:    v_lshl_or_b32 v1, v4, 16, v3
210; GFX10-NEXT:    v_lshl_or_b32 v2, v6, 16, v5
211; GFX10-NEXT:    s_setpc_b64 s[30:31]
212  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 1
213  ret <3 x i32> %load
214}
215
216define <3 x i32> @load_lds_v3i32_align2(<3 x i32> addrspace(3)* %ptr) {
217; GFX9-LABEL: load_lds_v3i32_align2:
218; GFX9:       ; %bb.0:
219; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220; GFX9-NEXT:    ds_read_u16 v1, v0
221; GFX9-NEXT:    ds_read_u16 v2, v0 offset:2
222; GFX9-NEXT:    ds_read_u16 v3, v0 offset:4
223; GFX9-NEXT:    ds_read_u16 v4, v0 offset:6
224; GFX9-NEXT:    ds_read_u16 v5, v0 offset:8
225; GFX9-NEXT:    ds_read_u16 v6, v0 offset:10
226; GFX9-NEXT:    s_waitcnt lgkmcnt(4)
227; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 16, v1
228; GFX9-NEXT:    s_waitcnt lgkmcnt(2)
229; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 16, v3
230; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
231; GFX9-NEXT:    v_lshl_or_b32 v2, v6, 16, v5
232; GFX9-NEXT:    s_setpc_b64 s[30:31]
233;
234; GFX7-LABEL: load_lds_v3i32_align2:
235; GFX7:       ; %bb.0:
236; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237; GFX7-NEXT:    s_mov_b32 m0, -1
238; GFX7-NEXT:    ds_read_u16 v2, v0 offset:8
239; GFX7-NEXT:    ds_read_u16 v1, v0 offset:4
240; GFX7-NEXT:    ds_read_u16 v3, v0 offset:2
241; GFX7-NEXT:    ds_read_u16 v4, v0
242; GFX7-NEXT:    ds_read_u16 v5, v0 offset:6
243; GFX7-NEXT:    ds_read_u16 v6, v0 offset:10
244; GFX7-NEXT:    s_waitcnt lgkmcnt(3)
245; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v3
246; GFX7-NEXT:    s_waitcnt lgkmcnt(2)
247; GFX7-NEXT:    v_or_b32_e32 v0, v0, v4
248; GFX7-NEXT:    s_waitcnt lgkmcnt(1)
249; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
250; GFX7-NEXT:    v_or_b32_e32 v1, v3, v1
251; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
252; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v6
253; GFX7-NEXT:    v_or_b32_e32 v2, v3, v2
254; GFX7-NEXT:    s_setpc_b64 s[30:31]
255;
256; GFX6-LABEL: load_lds_v3i32_align2:
257; GFX6:       ; %bb.0:
258; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 6, v0
260; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 4, v0
261; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 10, v0
262; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 8, v0
263; GFX6-NEXT:    v_add_i32_e32 v5, vcc, 2, v0
264; GFX6-NEXT:    s_mov_b32 m0, -1
265; GFX6-NEXT:    ds_read_u16 v1, v1
266; GFX6-NEXT:    ds_read_u16 v2, v2
267; GFX6-NEXT:    ds_read_u16 v3, v3
268; GFX6-NEXT:    ds_read_u16 v4, v4
269; GFX6-NEXT:    ds_read_u16 v5, v5
270; GFX6-NEXT:    ds_read_u16 v0, v0
271; GFX6-NEXT:    s_waitcnt lgkmcnt(5)
272; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
273; GFX6-NEXT:    s_waitcnt lgkmcnt(4)
274; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
275; GFX6-NEXT:    s_waitcnt lgkmcnt(3)
276; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
277; GFX6-NEXT:    s_waitcnt lgkmcnt(1)
278; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
279; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
280; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
281; GFX6-NEXT:    v_or_b32_e32 v0, v3, v0
282; GFX6-NEXT:    s_setpc_b64 s[30:31]
283;
284; GFX10-LABEL: load_lds_v3i32_align2:
285; GFX10:       ; %bb.0:
286; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
287; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
288; GFX10-NEXT:    ds_read_u16 v1, v0
289; GFX10-NEXT:    ds_read_u16 v2, v0 offset:2
290; GFX10-NEXT:    ds_read_u16 v3, v0 offset:4
291; GFX10-NEXT:    ds_read_u16 v4, v0 offset:6
292; GFX10-NEXT:    ds_read_u16 v5, v0 offset:8
293; GFX10-NEXT:    ds_read_u16 v6, v0 offset:10
294; GFX10-NEXT:    s_waitcnt lgkmcnt(4)
295; GFX10-NEXT:    v_lshl_or_b32 v0, v2, 16, v1
296; GFX10-NEXT:    s_waitcnt lgkmcnt(2)
297; GFX10-NEXT:    v_lshl_or_b32 v1, v4, 16, v3
298; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
299; GFX10-NEXT:    v_lshl_or_b32 v2, v6, 16, v5
300; GFX10-NEXT:    s_setpc_b64 s[30:31]
301  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 2
302  ret <3 x i32> %load
303}
304
305define <3 x i32> @load_lds_v3i32_align4(<3 x i32> addrspace(3)* %ptr) {
306; GFX9-LABEL: load_lds_v3i32_align4:
307; GFX9:       ; %bb.0:
308; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309; GFX9-NEXT:    v_mov_b32_e32 v2, v0
310; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
311; GFX9-NEXT:    ds_read_b32 v2, v2 offset:8
312; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
313; GFX9-NEXT:    s_setpc_b64 s[30:31]
314;
315; GFX7-LABEL: load_lds_v3i32_align4:
316; GFX7:       ; %bb.0:
317; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318; GFX7-NEXT:    v_mov_b32_e32 v2, v0
319; GFX7-NEXT:    s_mov_b32 m0, -1
320; GFX7-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
321; GFX7-NEXT:    ds_read_b32 v2, v2 offset:8
322; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
323; GFX7-NEXT:    s_setpc_b64 s[30:31]
324;
325; GFX6-LABEL: load_lds_v3i32_align4:
326; GFX6:       ; %bb.0:
327; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 4, v0
329; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 8, v0
330; GFX6-NEXT:    s_mov_b32 m0, -1
331; GFX6-NEXT:    ds_read_b32 v2, v2
332; GFX6-NEXT:    ds_read_b32 v0, v0
333; GFX6-NEXT:    ds_read_b32 v1, v1
334; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
335; GFX6-NEXT:    s_setpc_b64 s[30:31]
336;
337; GFX10-LABEL: load_lds_v3i32_align4:
338; GFX10:       ; %bb.0:
339; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
341; GFX10-NEXT:    v_mov_b32_e32 v2, v0
342; GFX10-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
343; GFX10-NEXT:    ds_read_b32 v2, v2 offset:8
344; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
345; GFX10-NEXT:    s_setpc_b64 s[30:31]
346  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 4
347  ret <3 x i32> %load
348}
349
350define <3 x i32> @load_lds_v3i32_align8(<3 x i32> addrspace(3)* %ptr) {
351; GFX9-LABEL: load_lds_v3i32_align8:
352; GFX9:       ; %bb.0:
353; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354; GFX9-NEXT:    v_mov_b32_e32 v2, v0
355; GFX9-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
356; GFX9-NEXT:    ds_read_b32 v2, v2 offset:8
357; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
358; GFX9-NEXT:    s_setpc_b64 s[30:31]
359;
360; GFX7-LABEL: load_lds_v3i32_align8:
361; GFX7:       ; %bb.0:
362; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363; GFX7-NEXT:    v_mov_b32_e32 v2, v0
364; GFX7-NEXT:    s_mov_b32 m0, -1
365; GFX7-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
366; GFX7-NEXT:    ds_read_b32 v2, v2 offset:8
367; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
368; GFX7-NEXT:    s_setpc_b64 s[30:31]
369;
370; GFX6-LABEL: load_lds_v3i32_align8:
371; GFX6:       ; %bb.0:
372; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 4, v0
374; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 8, v0
375; GFX6-NEXT:    s_mov_b32 m0, -1
376; GFX6-NEXT:    ds_read_b32 v2, v2
377; GFX6-NEXT:    ds_read_b32 v0, v0
378; GFX6-NEXT:    ds_read_b32 v1, v1
379; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
380; GFX6-NEXT:    s_setpc_b64 s[30:31]
381;
382; GFX10-LABEL: load_lds_v3i32_align8:
383; GFX10:       ; %bb.0:
384; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
386; GFX10-NEXT:    v_mov_b32_e32 v2, v0
387; GFX10-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
388; GFX10-NEXT:    ds_read_b32 v2, v2 offset:8
389; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
390; GFX10-NEXT:    s_setpc_b64 s[30:31]
391  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 8
392  ret <3 x i32> %load
393}
394
395define <3 x i32> @load_lds_v3i32_align16(<3 x i32> addrspace(3)* %ptr) {
396; GFX9-LABEL: load_lds_v3i32_align16:
397; GFX9:       ; %bb.0:
398; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399; GFX9-NEXT:    ds_read_b96 v[0:2], v0
400; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
401; GFX9-NEXT:    s_setpc_b64 s[30:31]
402;
403; GFX7-LABEL: load_lds_v3i32_align16:
404; GFX7:       ; %bb.0:
405; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406; GFX7-NEXT:    s_mov_b32 m0, -1
407; GFX7-NEXT:    ds_read_b96 v[0:2], v0
408; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
409; GFX7-NEXT:    s_setpc_b64 s[30:31]
410;
411; GFX6-LABEL: load_lds_v3i32_align16:
412; GFX6:       ; %bb.0:
413; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414; GFX6-NEXT:    v_mov_b32_e32 v2, v0
415; GFX6-NEXT:    s_mov_b32 m0, -1
416; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 8, v2
417; GFX6-NEXT:    ds_read_b64 v[0:1], v0
418; GFX6-NEXT:    ds_read_b32 v2, v2
419; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
420; GFX6-NEXT:    s_setpc_b64 s[30:31]
421;
422; GFX10-LABEL: load_lds_v3i32_align16:
423; GFX10:       ; %bb.0:
424; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
426; GFX10-NEXT:    ds_read_b96 v[0:2], v0
427; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
428; GFX10-NEXT:    s_setpc_b64 s[30:31]
429  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 16
430  ret <3 x i32> %load
431}
432