1; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -wasm-enable-unimplemented-simd -mattr=+simd128,+sign-ext | FileCheck %s --check-prefixes CHECK,SIMD128
2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+simd128,+sign-ext | FileCheck %s --check-prefixes CHECK,SIMD128-VM
3; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=-simd128,+sign-ext | FileCheck %s --check-prefixes CHECK,NO-SIMD128
4
5; Test SIMD loads and stores
6
7target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
8target triple = "wasm32-unknown-unknown"
9
10; ==============================================================================
11; 16 x i8
12; ==============================================================================
13; CHECK-LABEL: load_v16i8:
14; NO-SIMD128-NOT: v128
15; SIMD128-NEXT: .param i32{{$}}
16; SIMD128-NEXT: .result v128{{$}}
17; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
18; SIMD128-NEXT: return $pop[[R]]{{$}}
19define <16 x i8> @load_v16i8(<16 x i8>* %p) {
20  %v = load <16 x i8>, <16 x i8>* %p
21  ret <16 x i8> %v
22}
23
24; CHECK-LABEL: load_v16i8_with_folded_offset:
25; NO-SIMD128-NOT: v128
26; SIMD128-NEXT: .param i32{{$}}
27; SIMD128-NEXT: .result v128{{$}}
28; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
29; SIMD128-NEXT: return $pop[[R]]{{$}}
30define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
31  %q = ptrtoint <16 x i8>* %p to i32
32  %r = add nuw i32 %q, 16
33  %s = inttoptr i32 %r to <16 x i8>*
34  %v = load <16 x i8>, <16 x i8>* %s
35  ret <16 x i8> %v
36}
37
38; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
39; NO-SIMD128-NOT: v128
40; SIMD128-NEXT: .param i32{{$}}
41; SIMD128-NEXT: .result v128{{$}}
42; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
43; SIMD128-NEXT: return $pop[[R]]{{$}}
44define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
45  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
46  %v = load <16 x i8>, <16 x i8>* %s
47  ret <16 x i8> %v
48}
49
50; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
51; NO-SIMD128-NOT: v128
52; SIMD128-NEXT: .param i32{{$}}
53; SIMD128-NEXT: .result v128{{$}}
54; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
55; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
56; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
57; SIMD128-NEXT: return $pop[[R]]{{$}}
58define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
59  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
60  %v = load <16 x i8>, <16 x i8>* %s
61  ret <16 x i8> %v
62}
63
64; CHECK-LABEL: load_v16i8_with_unfolded_offset:
65; NO-SIMD128-NOT: v128
66; SIMD128-NEXT: .param i32{{$}}
67; SIMD128-NEXT: .result v128{{$}}
68; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
69; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
70; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
71; SIMD128-NEXT: return $pop[[R]]{{$}}
72define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
73  %q = ptrtoint <16 x i8>* %p to i32
74  %r = add nsw i32 %q, 16
75  %s = inttoptr i32 %r to <16 x i8>*
76  %v = load <16 x i8>, <16 x i8>* %s
77  ret <16 x i8> %v
78}
79
80; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
81; NO-SIMD128-NOT: v128
82; SIMD128-NEXT: .param i32{{$}}
83; SIMD128-NEXT: .result v128{{$}}
84; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
85; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
86; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
87; SIMD128-NEXT: return $pop[[R]]{{$}}
88define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
89  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
90  %v = load <16 x i8>, <16 x i8>* %s
91  ret <16 x i8> %v
92}
93
94; CHECK-LABEL: load_v16i8_from_numeric_address:
95; NO-SIMD128-NOT: v128
96; SIMD128-NEXT: .result v128{{$}}
97; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
98; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}}
99; SIMD128-NEXT: return $pop[[R]]{{$}}
100define <16 x i8> @load_v16i8_from_numeric_address() {
101  %s = inttoptr i32 32 to <16 x i8>*
102  %v = load <16 x i8>, <16 x i8>* %s
103  ret <16 x i8> %v
104}
105
106; CHECK-LABEL: load_v16i8_from_global_address:
107; NO-SIMD128-NOT: v128
108; SIMD128-NEXT: .result v128{{$}}
109; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
110; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v16i8($pop[[L0]]):p2align=0{{$}}
111; SIMD128-NEXT: return $pop[[R]]{{$}}
112@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
113define <16 x i8> @load_v16i8_from_global_address() {
114  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
115  ret <16 x i8> %v
116}
117
118; CHECK-LABEL: store_v16i8:
119; NO-SIMD128-NOT: v128
120; SIMD128-NEXT: .param v128, i32{{$}}
121; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}}
122define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
123  store <16 x i8> %v , <16 x i8>* %p
124  ret void
125}
126
127; CHECK-LABEL: store_v16i8_with_folded_offset:
128; NO-SIMD128-NOT: v128
129; SIMD128-NEXT: .param v128, i32{{$}}
130; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
131define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
132  %q = ptrtoint <16 x i8>* %p to i32
133  %r = add nuw i32 %q, 16
134  %s = inttoptr i32 %r to <16 x i8>*
135  store <16 x i8> %v , <16 x i8>* %s
136  ret void
137}
138
139; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
140; NO-SIMD128-NOT: v128
141; SIMD128-NEXT: .param v128, i32{{$}}
142; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
143define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
144  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
145  store <16 x i8> %v , <16 x i8>* %s
146  ret void
147}
148
149; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
150; NO-SIMD128-NOT: v128
151; SIMD128-NEXT: .param v128, i32{{$}}
152; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
153; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
154; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
155define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
156  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
157  store <16 x i8> %v , <16 x i8>* %s
158  ret void
159}
160
161; CHECK-LABEL: store_v16i8_with_unfolded_offset:
162; NO-SIMD128-NOT: v128
163; SIMD128-NEXT: .param v128, i32{{$}}
164; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
165; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
166; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
167define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
168  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
169  store <16 x i8> %v , <16 x i8>* %s
170  ret void
171}
172
173; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
174; NO-SIMD128-NOT: v128
175; SIMD128-NEXT: .param v128, i32{{$}}
176; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
177; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
178; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
179define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
180  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
181  store <16 x i8> %v , <16 x i8>* %s
182  ret void
183}
184
185; CHECK-LABEL: store_v16i8_to_numeric_address:
186; NO-SIMD128-NOT: v128
187; SIMD128-NEXT: .param v128{{$}}
188; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
189; SIMD128-NEXT: v128.store 32($pop[[R]]):p2align=0, $0{{$}}
190define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
191  %s = inttoptr i32 32 to <16 x i8>*
192  store <16 x i8> %v , <16 x i8>* %s
193  ret void
194}
195
196; CHECK-LABEL: store_v16i8_to_global_address:
197; NO-SIMD128-NOT: v128
198; SIMD128-NEXT: .param v128{{$}}
199; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
200; SIMD128-NEXT: v128.store gv_v16i8($pop[[R]]):p2align=0, $0{{$}}
201define void @store_v16i8_to_global_address(<16 x i8> %v) {
202  store <16 x i8> %v , <16 x i8>* @gv_v16i8
203  ret void
204}
205
206; ==============================================================================
207; 8 x i16
208; ==============================================================================
209; CHECK-LABEL: load_v8i16:
210; NO-SIMD128-NOT: v128
211; SIMD128-NEXT: .param i32{{$}}
212; SIMD128-NEXT: .result v128{{$}}
213; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
214; SIMD128-NEXT: return $pop[[R]]{{$}}
215define <8 x i16> @load_v8i16(<8 x i16>* %p) {
216  %v = load <8 x i16>, <8 x i16>* %p
217  ret <8 x i16> %v
218}
219
220; CHECK-LABEL: load_v8i16_with_folded_offset:
221; NO-SIMD128-NOT: v128
222; SIMD128-NEXT: .param i32{{$}}
223; SIMD128-NEXT: .result v128{{$}}
224; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
225; SIMD128-NEXT: return $pop[[R]]{{$}}
226define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
227  %q = ptrtoint <8 x i16>* %p to i32
228  %r = add nuw i32 %q, 16
229  %s = inttoptr i32 %r to <8 x i16>*
230  %v = load <8 x i16>, <8 x i16>* %s
231  ret <8 x i16> %v
232}
233
234; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
235; NO-SIMD128-NOT: v128
236; SIMD128-NEXT: .param i32{{$}}
237; SIMD128-NEXT: .result v128{{$}}
238; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
239; SIMD128-NEXT: return $pop[[R]]{{$}}
240define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
241  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
242  %v = load <8 x i16>, <8 x i16>* %s
243  ret <8 x i16> %v
244}
245
246; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
247; NO-SIMD128-NOT: v128
248; SIMD128-NEXT: .param i32{{$}}
249; SIMD128-NEXT: .result v128{{$}}
250; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
251; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
252; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
253; SIMD128-NEXT: return $pop[[R]]{{$}}
254define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
255  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
256  %v = load <8 x i16>, <8 x i16>* %s
257  ret <8 x i16> %v
258}
259
260; CHECK-LABEL: load_v8i16_with_unfolded_offset:
261; NO-SIMD128-NOT: v128
262; SIMD128-NEXT: .param i32{{$}}
263; SIMD128-NEXT: .result v128{{$}}
264; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
265; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
266; SIMD128-NEXT: v128.load $push[[L0:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
267; SIMD128-NEXT: return $pop[[L0]]{{$}}
268define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
269  %q = ptrtoint <8 x i16>* %p to i32
270  %r = add nsw i32 %q, 16
271  %s = inttoptr i32 %r to <8 x i16>*
272  %v = load <8 x i16>, <8 x i16>* %s
273  ret <8 x i16> %v
274}
275
276; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
277; NO-SIMD128-NOT: v128
278; SIMD128-NEXT: .param i32{{$}}
279; SIMD128-NEXT: .result v128{{$}}
280; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
281; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
282; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
283; SIMD128-NEXT: return $pop[[R]]{{$}}
284define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
285  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
286  %v = load <8 x i16>, <8 x i16>* %s
287  ret <8 x i16> %v
288}
289
290; CHECK-LABEL: load_v8i16_from_numeric_address:
291; NO-SIMD128-NOT: v128
292; SIMD128-NEXT: .result v128{{$}}
293; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
294; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}}
295; SIMD128-NEXT: return $pop[[R]]{{$}}
296define <8 x i16> @load_v8i16_from_numeric_address() {
297  %s = inttoptr i32 32 to <8 x i16>*
298  %v = load <8 x i16>, <8 x i16>* %s
299  ret <8 x i16> %v
300}
301
302; CHECK-LABEL: load_v8i16_from_global_address:
303; NO-SIMD128-NOT: v128
304; SIMD128-NEXT: .result v128{{$}}
305; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
306; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v8i16($pop[[L0]]):p2align=0{{$}}
307; SIMD128-NEXT: return $pop[[R]]{{$}}
308@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
309define <8 x i16> @load_v8i16_from_global_address() {
310  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
311  ret <8 x i16> %v
312}
313
314; CHECK-LABEL: store_v8i16:
315; NO-SIMD128-NOT: v128
316; SIMD128-NEXT: .param v128, i32{{$}}
317; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}}
318define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
319  store <8 x i16> %v , <8 x i16>* %p
320  ret void
321}
322
323; CHECK-LABEL: store_v8i16_with_folded_offset:
324; NO-SIMD128-NOT: v128
325; SIMD128-NEXT: .param v128, i32{{$}}
326; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
327define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
328  %q = ptrtoint <8 x i16>* %p to i32
329  %r = add nuw i32 %q, 16
330  %s = inttoptr i32 %r to <8 x i16>*
331  store <8 x i16> %v , <8 x i16>* %s
332  ret void
333}
334
335; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
336; NO-SIMD128-NOT: v128
337; SIMD128-NEXT: .param v128, i32{{$}}
338; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
339define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
340  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
341  store <8 x i16> %v , <8 x i16>* %s
342  ret void
343}
344
345; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
346; NO-SIMD128-NOT: v128
347; SIMD128-NEXT: .param v128, i32{{$}}
348; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
349; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
350; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
351define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
352  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
353  store <8 x i16> %v , <8 x i16>* %s
354  ret void
355}
356
357; CHECK-LABEL: store_v8i16_with_unfolded_offset:
358; NO-SIMD128-NOT: v128
359; SIMD128-NEXT: .param v128, i32{{$}}
360; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
361; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
362; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
363define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
364  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
365  store <8 x i16> %v , <8 x i16>* %s
366  ret void
367}
368
369; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
370; NO-SIMD128-NOT: v128
371; SIMD128-NEXT: .param v128, i32{{$}}
372; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
373; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
374; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
375define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
376  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
377  store <8 x i16> %v , <8 x i16>* %s
378  ret void
379}
380
381; CHECK-LABEL: store_v8i16_to_numeric_address:
382; NO-SIMD128-NOT: v128
383; SIMD128-NEXT: .param v128{{$}}
384; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
385; SIMD128-NEXT: v128.store 32($pop[[L0]]):p2align=0, $0{{$}}
386define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
387  %s = inttoptr i32 32 to <8 x i16>*
388  store <8 x i16> %v , <8 x i16>* %s
389  ret void
390}
391
392; CHECK-LABEL: store_v8i16_to_global_address:
393; NO-SIMD128-NOT: v128
394; SIMD128-NEXT: .param v128{{$}}
395; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
396; SIMD128-NEXT: v128.store gv_v8i16($pop[[R]]):p2align=0, $0{{$}}
397define void @store_v8i16_to_global_address(<8 x i16> %v) {
398  store <8 x i16> %v , <8 x i16>* @gv_v8i16
399  ret void
400}
401
402; ==============================================================================
403; 4 x i32
404; ==============================================================================
405; CHECK-LABEL: load_v4i32:
406; NO-SIMD128-NOT: v128
407; SIMD128-NEXT: .param i32{{$}}
408; SIMD128-NEXT: .result v128{{$}}
409; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
410; SIMD128-NEXT: return $pop[[R]]{{$}}
411define <4 x i32> @load_v4i32(<4 x i32>* %p) {
412  %v = load <4 x i32>, <4 x i32>* %p
413  ret <4 x i32> %v
414}
415
416; CHECK-LABEL: load_v4i32_with_folded_offset:
417; NO-SIMD128-NOT: v128
418; SIMD128-NEXT: .param i32{{$}}
419; SIMD128-NEXT: .result v128{{$}}
420; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
421; SIMD128-NEXT: return $pop[[R]]{{$}}
422define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
423  %q = ptrtoint <4 x i32>* %p to i32
424  %r = add nuw i32 %q, 16
425  %s = inttoptr i32 %r to <4 x i32>*
426  %v = load <4 x i32>, <4 x i32>* %s
427  ret <4 x i32> %v
428}
429
430; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
431; NO-SIMD128-NOT: v128
432; SIMD128-NEXT: .param i32{{$}}
433; SIMD128-NEXT: .result v128{{$}}
434; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
435; SIMD128-NEXT: return $pop[[R]]{{$}}
436define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
437  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
438  %v = load <4 x i32>, <4 x i32>* %s
439  ret <4 x i32> %v
440}
441
442; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
443; NO-SIMD128-NOT: v128
444; SIMD128-NEXT: .param i32{{$}}
445; SIMD128-NEXT: .result v128{{$}}
446; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
447; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
448; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
449; SIMD128-NEXT: return $pop[[R]]{{$}}
450define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
451  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
452  %v = load <4 x i32>, <4 x i32>* %s
453  ret <4 x i32> %v
454}
455
456; CHECK-LABEL: load_v4i32_with_unfolded_offset:
457; NO-SIMD128-NOT: v128
458; SIMD128-NEXT: .param i32{{$}}
459; SIMD128-NEXT: .result v128{{$}}
460; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
461; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
462; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
463; SIMD128-NEXT: return $pop[[R]]{{$}}
464define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
465  %q = ptrtoint <4 x i32>* %p to i32
466  %r = add nsw i32 %q, 16
467  %s = inttoptr i32 %r to <4 x i32>*
468  %v = load <4 x i32>, <4 x i32>* %s
469  ret <4 x i32> %v
470}
471
472; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
473; NO-SIMD128-NOT: v128
474; SIMD128-NEXT: .param i32{{$}}
475; SIMD128-NEXT: .result v128{{$}}
476; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
477; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
478; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
479; SIMD128-NEXT: return $pop[[R]]{{$}}
480define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
481  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
482  %v = load <4 x i32>, <4 x i32>* %s
483  ret <4 x i32> %v
484}
485
486; CHECK-LABEL: load_v4i32_from_numeric_address:
487; NO-SIMD128-NOT: v128
488; SIMD128-NEXT: .result v128{{$}}
489; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
490; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}}
491; SIMD128-NEXT: return $pop[[R]]{{$}}
492define <4 x i32> @load_v4i32_from_numeric_address() {
493  %s = inttoptr i32 32 to <4 x i32>*
494  %v = load <4 x i32>, <4 x i32>* %s
495  ret <4 x i32> %v
496}
497
498; CHECK-LABEL: load_v4i32_from_global_address:
499; NO-SIMD128-NOT: v128
500; SIMD128-NEXT: .result v128{{$}}
501; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
502; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4i32($pop[[L0]]):p2align=0{{$}}
503; SIMD128-NEXT: return $pop[[R]]{{$}}
504@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
505define <4 x i32> @load_v4i32_from_global_address() {
506  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
507  ret <4 x i32> %v
508}
509
510; CHECK-LABEL: store_v4i32:
511; NO-SIMD128-NOT: v128
512; SIMD128-NEXT: .param v128, i32{{$}}
513; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}}
514define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
515  store <4 x i32> %v , <4 x i32>* %p
516  ret void
517}
518
519; CHECK-LABEL: store_v4i32_with_folded_offset:
520; NO-SIMD128-NOT: v128
521; SIMD128-NEXT: .param v128, i32{{$}}
522; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
523define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
524  %q = ptrtoint <4 x i32>* %p to i32
525  %r = add nuw i32 %q, 16
526  %s = inttoptr i32 %r to <4 x i32>*
527  store <4 x i32> %v , <4 x i32>* %s
528  ret void
529}
530
531; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
532; NO-SIMD128-NOT: v128
533; SIMD128-NEXT: .param v128, i32{{$}}
534; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
535define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
536  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
537  store <4 x i32> %v , <4 x i32>* %s
538  ret void
539}
540
541; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
542; NO-SIMD128-NOT: v128
543; SIMD128-NEXT: .param v128, i32{{$}}
544; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
545; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
546; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
547define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
548  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
549  store <4 x i32> %v , <4 x i32>* %s
550  ret void
551}
552
553; CHECK-LABEL: store_v4i32_with_unfolded_offset:
554; NO-SIMD128-NOT: v128
555; SIMD128-NEXT: .param v128, i32{{$}}
556; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
557; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
558; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
559define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
560  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
561  store <4 x i32> %v , <4 x i32>* %s
562  ret void
563}
564
565; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
566; NO-SIMD128-NOT: v128
567; SIMD128-NEXT: .param v128, i32{{$}}
568; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
569; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
570; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
571define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
572  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
573  store <4 x i32> %v , <4 x i32>* %s
574  ret void
575}
576
577; CHECK-LABEL: store_v4i32_to_numeric_address:
578; NO-SIMD128-NOT: v128
579; SIMD128-NEXT: .param v128{{$}}
580; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
581; SIMD128-NEXT: v128.store 32($pop[[L0]]):p2align=0, $0{{$}}
582define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
583  %s = inttoptr i32 32 to <4 x i32>*
584  store <4 x i32> %v , <4 x i32>* %s
585  ret void
586}
587
588; CHECK-LABEL: store_v4i32_to_global_address:
589; NO-SIMD128-NOT: v128
590; SIMD128-NEXT: .param v128{{$}}
591; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
592; SIMD128-NEXT: v128.store gv_v4i32($pop[[R]]):p2align=0, $0{{$}}
593define void @store_v4i32_to_global_address(<4 x i32> %v) {
594  store <4 x i32> %v , <4 x i32>* @gv_v4i32
595  ret void
596}
597
598; ==============================================================================
599; 2 x i64
600; ==============================================================================
601; CHECK-LABEL: load_v2i64:
602; NO-SIMD128-NOT: v128
603; SIMD128-VM-NOT: v128
604; SIMD128-NEXT: .param i32{{$}}
605; SIMD128-NEXT: .result v128{{$}}
606; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
607; SIMD128-NEXT: return $pop[[R]]{{$}}
608define <2 x i64> @load_v2i64(<2 x i64>* %p) {
609  %v = load <2 x i64>, <2 x i64>* %p
610  ret <2 x i64> %v
611}
612
613; CHECK-LABEL: load_v2i64_with_folded_offset:
614; NO-SIMD128-NOT: v128
615; SIMD128-VM-NOT: v128
616; SIMD128-NEXT: .param i32{{$}}
617; SIMD128-NEXT: .result v128{{$}}
618; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
619; SIMD128-NEXT: return $pop[[R]]{{$}}
620define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
621  %q = ptrtoint <2 x i64>* %p to i32
622  %r = add nuw i32 %q, 16
623  %s = inttoptr i32 %r to <2 x i64>*
624  %v = load <2 x i64>, <2 x i64>* %s
625  ret <2 x i64> %v
626}
627
628; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
629; NO-SIMD128-NOT: v128
630; SIMD128-VM-NOT: v128
631; SIMD128-NEXT: .param i32{{$}}
632; SIMD128-NEXT: .result v128{{$}}
633; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
634; SIMD128-NEXT: return $pop[[R]]{{$}}
635define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
636  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
637  %v = load <2 x i64>, <2 x i64>* %s
638  ret <2 x i64> %v
639}
640
641; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
642; NO-SIMD128-NOT: v128
643; SIMD128-VM-NOT: v128
644; SIMD128-NEXT: .param i32{{$}}
645; SIMD128-NEXT: .result v128{{$}}
646; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
647; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
648; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
649; SIMD128-NEXT: return $pop[[R]]{{$}}
650define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
651  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
652  %v = load <2 x i64>, <2 x i64>* %s
653  ret <2 x i64> %v
654}
655
656; CHECK-LABEL: load_v2i64_with_unfolded_offset:
657; NO-SIMD128-NOT: v128
658; SIMD128-VM-NOT: v128
659; SIMD128-NEXT: .param i32{{$}}
660; SIMD128-NEXT: .result v128{{$}}
661; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
662; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
663; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
664; SIMD128-NEXT: return $pop[[R]]{{$}}
665define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
666  %q = ptrtoint <2 x i64>* %p to i32
667  %r = add nsw i32 %q, 16
668  %s = inttoptr i32 %r to <2 x i64>*
669  %v = load <2 x i64>, <2 x i64>* %s
670  ret <2 x i64> %v
671}
672
673; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
674; NO-SIMD128-NOT: v128
675; SIMD128-VM-NOT: v128
676; SIMD128-NEXT: .param i32{{$}}
677; SIMD128-NEXT: .result v128{{$}}
678; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
679; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
680; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
681; SIMD128-NEXT: return $pop[[R]]{{$}}
682define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
683  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
684  %v = load <2 x i64>, <2 x i64>* %s
685  ret <2 x i64> %v
686}
687
688; CHECK-LABEL: load_v2i64_from_numeric_address:
689; NO-SIMD128-NOT: v128
690; SIMD128-VM-NOT: v128
691; SIMD128-NEXT: .result v128{{$}}
692; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
693; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}}
694; SIMD128-NEXT: return $pop[[R]]{{$}}
695define <2 x i64> @load_v2i64_from_numeric_address() {
696  %s = inttoptr i32 32 to <2 x i64>*
697  %v = load <2 x i64>, <2 x i64>* %s
698  ret <2 x i64> %v
699}
700
701; CHECK-LABEL: load_v2i64_from_global_address:
702; NO-SIMD128-NOT: v128
703; SIMD128-VM-NOT: v128
704; SIMD128-NEXT: .result v128{{$}}
705; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
706; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2i64($pop[[L0]]):p2align=0{{$}}
707; SIMD128-NEXT: return $pop[[R]]{{$}}
708@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
709define <2 x i64> @load_v2i64_from_global_address() {
710  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
711  ret <2 x i64> %v
712}
713
714; CHECK-LABEL: store_v2i64:
715; NO-SIMD128-NOT: v128
716; SIMD128-VM-NOT: v128
717; SIMD128-NEXT: .param v128, i32{{$}}
718; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}}
719define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
720  store <2 x i64> %v , <2 x i64>* %p
721  ret void
722}
723
724; CHECK-LABEL: store_v2i64_with_folded_offset:
725; NO-SIMD128-NOT: v128
726; SIMD128-VM-NOT: v128
727; SIMD128-NEXT: .param v128, i32{{$}}
728; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
729define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
730  %q = ptrtoint <2 x i64>* %p to i32
731  %r = add nuw i32 %q, 16
732  %s = inttoptr i32 %r to <2 x i64>*
733  store <2 x i64> %v , <2 x i64>* %s
734  ret void
735}
736
737; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
738; NO-SIMD128-NOT: v128
739; SIMD128-VM-NOT: v128
740; SIMD128-NEXT: .param v128, i32{{$}}
741; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
742define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
743  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
744  store <2 x i64> %v , <2 x i64>* %s
745  ret void
746}
747
748; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
749; NO-SIMD128-NOT: v128
750; SIMD128-VM-NOT: v128
751; SIMD128-NEXT: .param v128, i32{{$}}
752; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
753; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
754; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
755define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
756  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
757  store <2 x i64> %v , <2 x i64>* %s
758  ret void
759}
760
761; CHECK-LABEL: store_v2i64_with_unfolded_offset:
762; NO-SIMD128-NOT: v128
763; SIMD128-VM-NOT: v128
764; SIMD128-NEXT: .param v128, i32{{$}}
765; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
766; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
767; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
768define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
769  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
770  store <2 x i64> %v , <2 x i64>* %s
771  ret void
772}
773
774; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
775; NO-SIMD128-NOT: v128
776; SIMD128-VM-NOT: v128
777; SIMD128-NEXT: .param v128, i32{{$}}
778; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
779; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
780; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
781define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
782  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
783  store <2 x i64> %v , <2 x i64>* %s
784  ret void
785}
786
787; CHECK-LABEL: store_v2i64_to_numeric_address:
788; NO-SIMD128-NOT: v128
789; SIMD128-VM-NOT: v128
790; SIMD128-NEXT: .param v128{{$}}
791; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
792; SIMD128-NEXT: v128.store 32($pop[[L0]]):p2align=0, $0{{$}}
793define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
794  %s = inttoptr i32 32 to <2 x i64>*
795  store <2 x i64> %v , <2 x i64>* %s
796  ret void
797}
798
799; CHECK-LABEL: store_v2i64_to_global_address:
800; NO-SIMD128-NOT: v128
801; SIMD128-VM-NOT: v128
802; SIMD128-NEXT: .param v128{{$}}
803; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
804; SIMD128-NEXT: v128.store gv_v2i64($pop[[R]]):p2align=0, $0{{$}}
805define void @store_v2i64_to_global_address(<2 x i64> %v) {
806  store <2 x i64> %v , <2 x i64>* @gv_v2i64
807  ret void
808}
809
810; ==============================================================================
811; 4 x float
812; ==============================================================================
813; CHECK-LABEL: load_v4f32:
814; NO-SIMD128-NOT: v128
815; SIMD128-NEXT: .param i32{{$}}
816; SIMD128-NEXT: .result v128{{$}}
817; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
818; SIMD128-NEXT: return $pop[[R]]{{$}}
819define <4 x float> @load_v4f32(<4 x float>* %p) {
820  %v = load <4 x float>, <4 x float>* %p
821  ret <4 x float> %v
822}
823
824; CHECK-LABEL: load_v4f32_with_folded_offset:
825; NO-SIMD128-NOT: v128
826; SIMD128-NEXT: .param i32{{$}}
827; SIMD128-NEXT: .result v128{{$}}
828; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
829; SIMD128-NEXT: return $pop[[R]]{{$}}
830define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
831  %q = ptrtoint <4 x float>* %p to i32
832  %r = add nuw i32 %q, 16
833  %s = inttoptr i32 %r to <4 x float>*
834  %v = load <4 x float>, <4 x float>* %s
835  ret <4 x float> %v
836}
837
838; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
839; NO-SIMD128-NOT: v128
840; SIMD128-NEXT: .param i32{{$}}
841; SIMD128-NEXT: .result v128{{$}}
842; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
843; SIMD128-NEXT: return $pop[[R]]{{$}}
844define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
845  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
846  %v = load <4 x float>, <4 x float>* %s
847  ret <4 x float> %v
848}
849
850; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
851; NO-SIMD128-NOT: v128
852; SIMD128-NEXT: .param i32{{$}}
853; SIMD128-NEXT: .result v128{{$}}
854; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
855; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
856; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
857; SIMD128-NEXT: return $pop[[R]]{{$}}
858define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
859  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
860  %v = load <4 x float>, <4 x float>* %s
861  ret <4 x float> %v
862}
863
864; CHECK-LABEL: load_v4f32_with_unfolded_offset:
865; NO-SIMD128-NOT: v128
866; SIMD128-NEXT: .param i32{{$}}
867; SIMD128-NEXT: .result v128{{$}}
868; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
869; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
870; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
871; SIMD128-NEXT: return $pop[[R]]{{$}}
872define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
873  %q = ptrtoint <4 x float>* %p to i32
874  %r = add nsw i32 %q, 16
875  %s = inttoptr i32 %r to <4 x float>*
876  %v = load <4 x float>, <4 x float>* %s
877  ret <4 x float> %v
878}
879
880; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
881; NO-SIMD128-NOT: v128
882; SIMD128-NEXT: .param i32{{$}}
883; SIMD128-NEXT: .result v128{{$}}
884; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
885; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
886; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
887; SIMD128-NEXT: return $pop[[R]]{{$}}
888define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
889  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
890  %v = load <4 x float>, <4 x float>* %s
891  ret <4 x float> %v
892}
893
894; CHECK-LABEL: load_v4f32_from_numeric_address:
895; NO-SIMD128-NOT: v128
896; SIMD128-NEXT: .result v128{{$}}
897; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
898; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}}
899; SIMD128-NEXT: return $pop[[R]]{{$}}
900define <4 x float> @load_v4f32_from_numeric_address() {
901  %s = inttoptr i32 32 to <4 x float>*
902  %v = load <4 x float>, <4 x float>* %s
903  ret <4 x float> %v
904}
905
906; CHECK-LABEL: load_v4f32_from_global_address:
907; NO-SIMD128-NOT: v128
908; SIMD128-NEXT: .result v128{{$}}
909; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
910; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4f32($pop[[L0]]):p2align=0{{$}}
911; SIMD128-NEXT: return $pop[[R]]{{$}}
912@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
913define <4 x float> @load_v4f32_from_global_address() {
914  %v = load <4 x float>, <4 x float>* @gv_v4f32
915  ret <4 x float> %v
916}
917
918; CHECK-LABEL: store_v4f32:
919; NO-SIMD128-NOT: v128
920; SIMD128-NEXT: .param v128, i32{{$}}
921; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}}
922define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
923  store <4 x float> %v , <4 x float>* %p
924  ret void
925}
926
927; CHECK-LABEL: store_v4f32_with_folded_offset:
928; NO-SIMD128-NOT: v128
929; SIMD128-NEXT: .param v128, i32{{$}}
930; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
931define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
932  %q = ptrtoint <4 x float>* %p to i32
933  %r = add nuw i32 %q, 16
934  %s = inttoptr i32 %r to <4 x float>*
935  store <4 x float> %v , <4 x float>* %s
936  ret void
937}
938
939; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
940; NO-SIMD128-NOT: v128
941; SIMD128-NEXT: .param v128, i32{{$}}
942; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
943define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
944  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
945  store <4 x float> %v , <4 x float>* %s
946  ret void
947}
948
949; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
950; NO-SIMD128-NOT: v128
951; SIMD128-NEXT: .param v128, i32{{$}}
952; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
953; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
954; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
955define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
956  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
957  store <4 x float> %v , <4 x float>* %s
958  ret void
959}
960
961; CHECK-LABEL: store_v4f32_with_unfolded_offset:
962; NO-SIMD128-NOT: v128
963; SIMD128-NEXT: .param v128, i32{{$}}
964; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
965; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
966; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
967define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
968  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
969  store <4 x float> %v , <4 x float>* %s
970  ret void
971}
972
973; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
974; NO-SIMD128-NOT: v128
975; SIMD128-NEXT: .param v128, i32{{$}}
976; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
977; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
978; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
979define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
980  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
981  store <4 x float> %v , <4 x float>* %s
982  ret void
983}
984
985; CHECK-LABEL: store_v4f32_to_numeric_address:
986; NO-SIMD128-NOT: v128
987; SIMD128-NEXT: .param v128{{$}}
988; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
989; SIMD128-NEXT: v128.store 32($pop[[L0]]):p2align=0, $0{{$}}
990define void @store_v4f32_to_numeric_address(<4 x float> %v) {
991  %s = inttoptr i32 32 to <4 x float>*
992  store <4 x float> %v , <4 x float>* %s
993  ret void
994}
995
996; CHECK-LABEL: store_v4f32_to_global_address:
997; NO-SIMD128-NOT: v128
998; SIMD128-NEXT: .param v128{{$}}
999; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
1000; SIMD128-NEXT: v128.store gv_v4f32($pop[[R]]):p2align=0, $0{{$}}
1001define void @store_v4f32_to_global_address(<4 x float> %v) {
1002  store <4 x float> %v , <4 x float>* @gv_v4f32
1003  ret void
1004}
1005
1006; ==============================================================================
1007; 2 x double
1008; ==============================================================================
1009; CHECK-LABEL: load_v2f64:
1010; NO-SIMD128-NOT: v128
1011; SIMD128-VM-NOT: v128
1012; SIMD128-NEXT: .param i32{{$}}
1013; SIMD128-NEXT: .result v128{{$}}
1014; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0):p2align=0{{$}}
1015; SIMD128-NEXT: return $pop[[R]]{{$}}
1016define <2 x double> @load_v2f64(<2 x double>* %p) {
1017  %v = load <2 x double>, <2 x double>* %p
1018  ret <2 x double> %v
1019}
1020
1021; CHECK-LABEL: load_v2f64_with_folded_offset:
1022; NO-SIMD128-NOT: v128
1023; SIMD128-VM-NOT: v128
1024; SIMD128-NEXT: .param i32{{$}}
1025; SIMD128-NEXT: .result v128{{$}}
1026; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
1027; SIMD128-NEXT: return $pop[[R]]{{$}}
1028define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
1029  %q = ptrtoint <2 x double>* %p to i32
1030  %r = add nuw i32 %q, 16
1031  %s = inttoptr i32 %r to <2 x double>*
1032  %v = load <2 x double>, <2 x double>* %s
1033  ret <2 x double> %v
1034}
1035
1036; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
1037; NO-SIMD128-NOT: v128
1038; SIMD128-VM-NOT: v128
1039; SIMD128-NEXT: .param i32{{$}}
1040; SIMD128-NEXT: .result v128{{$}}
1041; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0):p2align=0{{$}}
1042; SIMD128-NEXT: return $pop[[R]]{{$}}
1043define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
1044  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
1045  %v = load <2 x double>, <2 x double>* %s
1046  ret <2 x double> %v
1047}
1048
1049; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
1050; NO-SIMD128-NOT: v128
1051; SIMD128-VM-NOT: v128
1052; SIMD128-NEXT: .param i32{{$}}
1053; SIMD128-NEXT: .result v128{{$}}
1054; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1055; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1056; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
1057; SIMD128-NEXT: return $pop[[R]]{{$}}
1058define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
1059  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
1060  %v = load <2 x double>, <2 x double>* %s
1061  ret <2 x double> %v
1062}
1063
1064; CHECK-LABEL: load_v2f64_with_unfolded_offset:
1065; NO-SIMD128-NOT: v128
1066; SIMD128-VM-NOT: v128
1067; SIMD128-NEXT: .param i32{{$}}
1068; SIMD128-NEXT: .result v128{{$}}
1069; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1070; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1071; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
1072; SIMD128-NEXT: return $pop[[R]]{{$}}
1073define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
1074  %q = ptrtoint <2 x double>* %p to i32
1075  %r = add nsw i32 %q, 16
1076  %s = inttoptr i32 %r to <2 x double>*
1077  %v = load <2 x double>, <2 x double>* %s
1078  ret <2 x double> %v
1079}
1080
1081; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
1082; NO-SIMD128-NOT: v128
1083; SIMD128-VM-NOT: v128
1084; SIMD128-NEXT: .param i32{{$}}
1085; SIMD128-NEXT: .result v128{{$}}
1086; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1087; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1088; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]):p2align=0{{$}}
1089; SIMD128-NEXT: return $pop[[R]]{{$}}
1090define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
1091  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
1092  %v = load <2 x double>, <2 x double>* %s
1093  ret <2 x double> %v
1094}
1095
1096; CHECK-LABEL: load_v2f64_from_numeric_address:
1097; NO-SIMD128-NOT: v128
1098; SIMD128-VM-NOT: v128
1099; SIMD128-NEXT: .result v128{{$}}
1100; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1101; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]):p2align=0{{$}}
1102; SIMD128-NEXT: return $pop[[R]]{{$}}
1103define <2 x double> @load_v2f64_from_numeric_address() {
1104  %s = inttoptr i32 32 to <2 x double>*
1105  %v = load <2 x double>, <2 x double>* %s
1106  ret <2 x double> %v
1107}
1108
1109; CHECK-LABEL: load_v2f64_from_global_address:
1110; NO-SIMD128-NOT: v128
1111; SIMD128-VM-NOT: v128
1112; SIMD128-NEXT: .result v128{{$}}
1113; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1114; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2f64($pop[[L0]]):p2align=0{{$}}
1115; SIMD128-NEXT: return $pop[[R]]{{$}}
1116@gv_v2f64 = global <2 x double> <double 42., double 42.>
1117define <2 x double> @load_v2f64_from_global_address() {
1118  %v = load <2 x double>, <2 x double>* @gv_v2f64
1119  ret <2 x double> %v
1120}
1121
1122; CHECK-LABEL: store_v2f64:
1123; NO-SIMD128-NOT: v128
1124; SIMD128-VM-NOT: v128
1125; SIMD128-NEXT: .param v128, i32{{$}}
1126; SIMD128-NEXT: v128.store 0($1):p2align=0, $0{{$}}
1127define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
1128  store <2 x double> %v , <2 x double>* %p
1129  ret void
1130}
1131
1132; CHECK-LABEL: store_v2f64_with_folded_offset:
1133; NO-SIMD128-NOT: v128
1134; SIMD128-VM-NOT: v128
1135; SIMD128-NEXT: .param v128, i32{{$}}
1136; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
1137define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
1138  %q = ptrtoint <2 x double>* %p to i32
1139  %r = add nuw i32 %q, 16
1140  %s = inttoptr i32 %r to <2 x double>*
1141  store <2 x double> %v , <2 x double>* %s
1142  ret void
1143}
1144
1145; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
1146; NO-SIMD128-NOT: v128
1147; SIMD128-VM-NOT: v128
1148; SIMD128-NEXT: .param v128, i32{{$}}
1149; SIMD128-NEXT: v128.store 16($1):p2align=0, $0{{$}}
1150define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
1151  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
1152  store <2 x double> %v , <2 x double>* %s
1153  ret void
1154}
1155
1156; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
1157; NO-SIMD128-NOT: v128
1158; SIMD128-VM-NOT: v128
1159; SIMD128-NEXT: .param v128, i32{{$}}
1160; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1161; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1162; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
1163define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
1164  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
1165  store <2 x double> %v , <2 x double>* %s
1166  ret void
1167}
1168
1169; CHECK-LABEL: store_v2f64_with_unfolded_offset:
1170; NO-SIMD128-NOT: v128
1171; SIMD128-VM-NOT: v128
1172; SIMD128-NEXT: .param v128, i32{{$}}
1173; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1174; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1175; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
1176define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
1177  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
1178  store <2 x double> %v , <2 x double>* %s
1179  ret void
1180}
1181
1182; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
1183; NO-SIMD128-NOT: v128
1184; SIMD128-VM-NOT: v128
1185; SIMD128-NEXT: .param v128, i32{{$}}
1186; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1187; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1188; SIMD128-NEXT: v128.store 0($pop[[R]]):p2align=0, $0{{$}}
1189define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
1190  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
1191  store <2 x double> %v , <2 x double>* %s
1192  ret void
1193}
1194
1195; CHECK-LABEL: store_v2f64_to_numeric_address:
1196; NO-SIMD128-NOT: v128
1197; SIMD128-VM-NOT: v128
1198; SIMD128-NEXT: .param v128{{$}}
1199; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1200; SIMD128-NEXT: v128.store 32($pop[[L0]]):p2align=0, $0{{$}}
1201define void @store_v2f64_to_numeric_address(<2 x double> %v) {
1202  %s = inttoptr i32 32 to <2 x double>*
1203  store <2 x double> %v , <2 x double>* %s
1204  ret void
1205}
1206
1207; CHECK-LABEL: store_v2f64_to_global_address:
1208; NO-SIMD128-NOT: v128
1209; SIMD128-VM-NOT: v128
1210; SIMD128-NEXT: .param v128{{$}}
1211; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
1212; SIMD128-NEXT: v128.store gv_v2f64($pop[[R]]):p2align=0, $0{{$}}
1213define void @store_v2f64_to_global_address(<2 x double> %v) {
1214  store <2 x double> %v , <2 x double>* @gv_v2f64
1215  ret void
1216}
1217