1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
3; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128
4
5; Test SIMD loads and stores
6
7target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
8target triple = "wasm32-unknown-unknown"
9
10; ==============================================================================
11; 16 x i8
12; ==============================================================================
13; CHECK-LABEL: load_v16i8:
14; NO-SIMD128-NOT: v128
15; SIMD128-NEXT: .functype load_v16i8 (i32) -> (v128){{$}}
16; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
17; SIMD128-NEXT: return $pop[[R]]{{$}}
18define <16 x i8> @load_v16i8(<16 x i8>* %p) {
19  %v = load <16 x i8>, <16 x i8>* %p
20  ret <16 x i8> %v
21}
22
23; CHECK-LABEL: load_v16i8_with_folded_offset:
24; NO-SIMD128-NOT: v128
25; SIMD128-NEXT: .functype load_v16i8_with_folded_offset (i32) -> (v128){{$}}
26; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
27; SIMD128-NEXT: return $pop[[R]]{{$}}
28define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
29  %q = ptrtoint <16 x i8>* %p to i32
30  %r = add nuw i32 %q, 16
31  %s = inttoptr i32 %r to <16 x i8>*
32  %v = load <16 x i8>, <16 x i8>* %s
33  ret <16 x i8> %v
34}
35
36; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
37; NO-SIMD128-NOT: v128
38; SIMD128-NEXT: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128){{$}}
39; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
40; SIMD128-NEXT: return $pop[[R]]{{$}}
41define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
42  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
43  %v = load <16 x i8>, <16 x i8>* %s
44  ret <16 x i8> %v
45}
46
47; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
48; NO-SIMD128-NOT: v128
49; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
50; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
51; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
52; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
53; SIMD128-NEXT: return $pop[[R]]{{$}}
54define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
55  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
56  %v = load <16 x i8>, <16 x i8>* %s
57  ret <16 x i8> %v
58}
59
60; CHECK-LABEL: load_v16i8_with_unfolded_offset:
61; NO-SIMD128-NOT: v128
62; SIMD128-NEXT: .functype load_v16i8_with_unfolded_offset (i32) -> (v128){{$}}
63; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
64; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
65; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
66; SIMD128-NEXT: return $pop[[R]]{{$}}
67define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
68  %q = ptrtoint <16 x i8>* %p to i32
69  %r = add nsw i32 %q, 16
70  %s = inttoptr i32 %r to <16 x i8>*
71  %v = load <16 x i8>, <16 x i8>* %s
72  ret <16 x i8> %v
73}
74
75; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
76; NO-SIMD128-NOT: v128
77; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}}
78; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
79; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
80; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
81; SIMD128-NEXT: return $pop[[R]]{{$}}
82define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
83  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
84  %v = load <16 x i8>, <16 x i8>* %s
85  ret <16 x i8> %v
86}
87
88; CHECK-LABEL: load_v16i8_from_numeric_address:
89; NO-SIMD128-NOT: v128
90; SIMD128-NEXT: .functype load_v16i8_from_numeric_address () -> (v128){{$}}
91; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
92; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
93; SIMD128-NEXT: return $pop[[R]]{{$}}
94define <16 x i8> @load_v16i8_from_numeric_address() {
95  %s = inttoptr i32 32 to <16 x i8>*
96  %v = load <16 x i8>, <16 x i8>* %s
97  ret <16 x i8> %v
98}
99
100; CHECK-LABEL: load_v16i8_from_global_address:
101; NO-SIMD128-NOT: v128
102; SIMD128-NEXT: .functype load_v16i8_from_global_address () -> (v128){{$}}
103; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
104; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v16i8($pop[[L0]]){{$}}
105; SIMD128-NEXT: return $pop[[R]]{{$}}
106@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
107define <16 x i8> @load_v16i8_from_global_address() {
108  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
109  ret <16 x i8> %v
110}
111
112; CHECK-LABEL: store_v16i8:
113; NO-SIMD128-NOT: v128
114; SIMD128-NEXT: .functype store_v16i8 (v128, i32) -> (){{$}}
115; SIMD128-NEXT: v128.store 0($1), $0{{$}}
116define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
117  store <16 x i8> %v , <16 x i8>* %p
118  ret void
119}
120
121; CHECK-LABEL: store_v16i8_with_folded_offset:
122; NO-SIMD128-NOT: v128
123; SIMD128-NEXT: .functype store_v16i8_with_folded_offset (v128, i32) -> (){{$}}
124; SIMD128-NEXT: v128.store 16($1), $0{{$}}
125define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
126  %q = ptrtoint <16 x i8>* %p to i32
127  %r = add nuw i32 %q, 16
128  %s = inttoptr i32 %r to <16 x i8>*
129  store <16 x i8> %v , <16 x i8>* %s
130  ret void
131}
132
133; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
134; NO-SIMD128-NOT: v128
135; SIMD128-NEXT: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> (){{$}}
136; SIMD128-NEXT: v128.store 16($1), $0{{$}}
137define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
138  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
139  store <16 x i8> %v , <16 x i8>* %s
140  ret void
141}
142
143; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
144; NO-SIMD128-NOT: v128
145; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
146; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
147; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
148; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
149define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
150  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
151  store <16 x i8> %v , <16 x i8>* %s
152  ret void
153}
154
155; CHECK-LABEL: store_v16i8_with_unfolded_offset:
156; NO-SIMD128-NOT: v128
157; SIMD128-NEXT: .functype store_v16i8_with_unfolded_offset (v128, i32) -> (){{$}}
158; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
159; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
160; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
161define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
162  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
163  store <16 x i8> %v , <16 x i8>* %s
164  ret void
165}
166
167; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
168; NO-SIMD128-NOT: v128
169; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> (){{$}}
170; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
171; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
172; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
173define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
174  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
175  store <16 x i8> %v , <16 x i8>* %s
176  ret void
177}
178
179; CHECK-LABEL: store_v16i8_to_numeric_address:
180; NO-SIMD128-NOT: v128
181; SIMD128-NEXT: .functype store_v16i8_to_numeric_address (v128) -> (){{$}}
182; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
183; SIMD128-NEXT: v128.store 32($pop[[R]]), $0{{$}}
184define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
185  %s = inttoptr i32 32 to <16 x i8>*
186  store <16 x i8> %v , <16 x i8>* %s
187  ret void
188}
189
190; CHECK-LABEL: store_v16i8_to_global_address:
191; NO-SIMD128-NOT: v128
192; SIMD128-NEXT: .functype store_v16i8_to_global_address (v128) -> (){{$}}
193; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
194; SIMD128-NEXT: v128.store gv_v16i8($pop[[R]]), $0{{$}}
195define void @store_v16i8_to_global_address(<16 x i8> %v) {
196  store <16 x i8> %v , <16 x i8>* @gv_v16i8
197  ret void
198}
199
200; ==============================================================================
201; 8 x i16
202; ==============================================================================
203; CHECK-LABEL: load_v8i16:
204; NO-SIMD128-NOT: v128
205; SIMD128-NEXT: .functype load_v8i16 (i32) -> (v128){{$}}
206; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
207; SIMD128-NEXT: return $pop[[R]]{{$}}
208define <8 x i16> @load_v8i16(<8 x i16>* %p) {
209  %v = load <8 x i16>, <8 x i16>* %p
210  ret <8 x i16> %v
211}
212
213; CHECK-LABEL: load_v8i16_with_folded_offset:
214; NO-SIMD128-NOT: v128
215; SIMD128-NEXT: .functype load_v8i16_with_folded_offset (i32) -> (v128){{$}}
216; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
217; SIMD128-NEXT: return $pop[[R]]{{$}}
218define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
219  %q = ptrtoint <8 x i16>* %p to i32
220  %r = add nuw i32 %q, 16
221  %s = inttoptr i32 %r to <8 x i16>*
222  %v = load <8 x i16>, <8 x i16>* %s
223  ret <8 x i16> %v
224}
225
226; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
227; NO-SIMD128-NOT: v128
228; SIMD128-NEXT: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
229; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
230; SIMD128-NEXT: return $pop[[R]]{{$}}
231define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
232  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
233  %v = load <8 x i16>, <8 x i16>* %s
234  ret <8 x i16> %v
235}
236
237; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
238; NO-SIMD128-NOT: v128
239; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
240; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
241; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
242; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
243; SIMD128-NEXT: return $pop[[R]]{{$}}
244define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
245  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
246  %v = load <8 x i16>, <8 x i16>* %s
247  ret <8 x i16> %v
248}
249
250; CHECK-LABEL: load_v8i16_with_unfolded_offset:
251; NO-SIMD128-NOT: v128
252; SIMD128-NEXT: .functype load_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
253; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
254; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
255; SIMD128-NEXT: v128.load $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
256; SIMD128-NEXT: return $pop[[L0]]{{$}}
257define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
258  %q = ptrtoint <8 x i16>* %p to i32
259  %r = add nsw i32 %q, 16
260  %s = inttoptr i32 %r to <8 x i16>*
261  %v = load <8 x i16>, <8 x i16>* %s
262  ret <8 x i16> %v
263}
264
265; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
266; NO-SIMD128-NOT: v128
267; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
268; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
269; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
270; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
271; SIMD128-NEXT: return $pop[[R]]{{$}}
272define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
273  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
274  %v = load <8 x i16>, <8 x i16>* %s
275  ret <8 x i16> %v
276}
277
278; CHECK-LABEL: load_v8i16_from_numeric_address:
279; NO-SIMD128-NOT: v128
280; SIMD128-NEXT: .functype load_v8i16_from_numeric_address () -> (v128){{$}}
281; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
282; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
283; SIMD128-NEXT: return $pop[[R]]{{$}}
284define <8 x i16> @load_v8i16_from_numeric_address() {
285  %s = inttoptr i32 32 to <8 x i16>*
286  %v = load <8 x i16>, <8 x i16>* %s
287  ret <8 x i16> %v
288}
289
290; CHECK-LABEL: load_v8i16_from_global_address:
291; NO-SIMD128-NOT: v128
292; SIMD128-NEXT: .functype load_v8i16_from_global_address () -> (v128){{$}}
293; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
294; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v8i16($pop[[L0]]){{$}}
295; SIMD128-NEXT: return $pop[[R]]{{$}}
296@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
297define <8 x i16> @load_v8i16_from_global_address() {
298  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
299  ret <8 x i16> %v
300}
301
302; CHECK-LABEL: store_v8i16:
303; NO-SIMD128-NOT: v128
304; SIMD128-NEXT: .functype store_v8i16 (v128, i32) -> (){{$}}
305; SIMD128-NEXT: v128.store 0($1), $0{{$}}
306define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
307  store <8 x i16> %v , <8 x i16>* %p
308  ret void
309}
310
311; CHECK-LABEL: store_v8i16_with_folded_offset:
312; NO-SIMD128-NOT: v128
313; SIMD128-NEXT: .functype store_v8i16_with_folded_offset (v128, i32) -> (){{$}}
314; SIMD128-NEXT: v128.store 16($1), $0{{$}}
315define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
316  %q = ptrtoint <8 x i16>* %p to i32
317  %r = add nuw i32 %q, 16
318  %s = inttoptr i32 %r to <8 x i16>*
319  store <8 x i16> %v , <8 x i16>* %s
320  ret void
321}
322
323; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
324; NO-SIMD128-NOT: v128
325; SIMD128-NEXT: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> (){{$}}
326; SIMD128-NEXT: v128.store 16($1), $0{{$}}
327define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
328  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
329  store <8 x i16> %v , <8 x i16>* %s
330  ret void
331}
332
333; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
334; NO-SIMD128-NOT: v128
335; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
336; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
337; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
338; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
339define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
340  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
341  store <8 x i16> %v , <8 x i16>* %s
342  ret void
343}
344
345; CHECK-LABEL: store_v8i16_with_unfolded_offset:
346; NO-SIMD128-NOT: v128
347; SIMD128-NEXT: .functype store_v8i16_with_unfolded_offset (v128, i32) -> (){{$}}
348; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
349; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
350; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
351define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
352  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
353  store <8 x i16> %v , <8 x i16>* %s
354  ret void
355}
356
357; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
358; NO-SIMD128-NOT: v128
359; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> (){{$}}
360; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
361; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
362; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
363define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
364  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
365  store <8 x i16> %v , <8 x i16>* %s
366  ret void
367}
368
369; CHECK-LABEL: store_v8i16_to_numeric_address:
370; NO-SIMD128-NOT: v128
371; SIMD128-NEXT: .functype store_v8i16_to_numeric_address (v128) -> (){{$}}
372; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
373; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
374define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
375  %s = inttoptr i32 32 to <8 x i16>*
376  store <8 x i16> %v , <8 x i16>* %s
377  ret void
378}
379
380; CHECK-LABEL: store_v8i16_to_global_address:
381; NO-SIMD128-NOT: v128
382; SIMD128-NEXT: .functype store_v8i16_to_global_address (v128) -> (){{$}}
383; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
384; SIMD128-NEXT: v128.store gv_v8i16($pop[[R]]), $0{{$}}
385define void @store_v8i16_to_global_address(<8 x i16> %v) {
386  store <8 x i16> %v , <8 x i16>* @gv_v8i16
387  ret void
388}
389
390; ==============================================================================
391; 4 x i32
392; ==============================================================================
393; CHECK-LABEL: load_v4i32:
394; NO-SIMD128-NOT: v128
395; SIMD128-NEXT: .functype load_v4i32 (i32) -> (v128){{$}}
396; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
397; SIMD128-NEXT: return $pop[[R]]{{$}}
398define <4 x i32> @load_v4i32(<4 x i32>* %p) {
399  %v = load <4 x i32>, <4 x i32>* %p
400  ret <4 x i32> %v
401}
402
403; CHECK-LABEL: load_v4i32_with_folded_offset:
404; NO-SIMD128-NOT: v128
405; SIMD128-NEXT: .functype load_v4i32_with_folded_offset (i32) -> (v128){{$}}
406; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
407; SIMD128-NEXT: return $pop[[R]]{{$}}
408define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
409  %q = ptrtoint <4 x i32>* %p to i32
410  %r = add nuw i32 %q, 16
411  %s = inttoptr i32 %r to <4 x i32>*
412  %v = load <4 x i32>, <4 x i32>* %s
413  ret <4 x i32> %v
414}
415
416; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
417; NO-SIMD128-NOT: v128
418; SIMD128-NEXT: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
419; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
420; SIMD128-NEXT: return $pop[[R]]{{$}}
421define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
422  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
423  %v = load <4 x i32>, <4 x i32>* %s
424  ret <4 x i32> %v
425}
426
427; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
428; NO-SIMD128-NOT: v128
429; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
430; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
431; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
432; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
433; SIMD128-NEXT: return $pop[[R]]{{$}}
434define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
435  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
436  %v = load <4 x i32>, <4 x i32>* %s
437  ret <4 x i32> %v
438}
439
440; CHECK-LABEL: load_v4i32_with_unfolded_offset:
441; NO-SIMD128-NOT: v128
442; SIMD128-NEXT: .functype load_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
443; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
444; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
445; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
446; SIMD128-NEXT: return $pop[[R]]{{$}}
447define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
448  %q = ptrtoint <4 x i32>* %p to i32
449  %r = add nsw i32 %q, 16
450  %s = inttoptr i32 %r to <4 x i32>*
451  %v = load <4 x i32>, <4 x i32>* %s
452  ret <4 x i32> %v
453}
454
455; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
456; NO-SIMD128-NOT: v128
457; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
458; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
459; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
460; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
461; SIMD128-NEXT: return $pop[[R]]{{$}}
462define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
463  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
464  %v = load <4 x i32>, <4 x i32>* %s
465  ret <4 x i32> %v
466}
467
468; CHECK-LABEL: load_v4i32_from_numeric_address:
469; NO-SIMD128-NOT: v128
470; SIMD128-NEXT: .functype load_v4i32_from_numeric_address () -> (v128){{$}}
471; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
472; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
473; SIMD128-NEXT: return $pop[[R]]{{$}}
474define <4 x i32> @load_v4i32_from_numeric_address() {
475  %s = inttoptr i32 32 to <4 x i32>*
476  %v = load <4 x i32>, <4 x i32>* %s
477  ret <4 x i32> %v
478}
479
480; CHECK-LABEL: load_v4i32_from_global_address:
481; NO-SIMD128-NOT: v128
482; SIMD128-NEXT: .functype load_v4i32_from_global_address () -> (v128){{$}}
483; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
484; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4i32($pop[[L0]]){{$}}
485; SIMD128-NEXT: return $pop[[R]]{{$}}
486@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
487define <4 x i32> @load_v4i32_from_global_address() {
488  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
489  ret <4 x i32> %v
490}
491
492; CHECK-LABEL: store_v4i32:
493; NO-SIMD128-NOT: v128
494; SIMD128-NEXT: .functype store_v4i32 (v128, i32) -> (){{$}}
495; SIMD128-NEXT: v128.store 0($1), $0{{$}}
496define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
497  store <4 x i32> %v , <4 x i32>* %p
498  ret void
499}
500
501; CHECK-LABEL: store_v4i32_with_folded_offset:
502; NO-SIMD128-NOT: v128
503; SIMD128-NEXT: .functype store_v4i32_with_folded_offset (v128, i32) -> (){{$}}
504; SIMD128-NEXT: v128.store 16($1), $0{{$}}
505define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
506  %q = ptrtoint <4 x i32>* %p to i32
507  %r = add nuw i32 %q, 16
508  %s = inttoptr i32 %r to <4 x i32>*
509  store <4 x i32> %v , <4 x i32>* %s
510  ret void
511}
512
513; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
514; NO-SIMD128-NOT: v128
515; SIMD128-NEXT: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> (){{$}}
516; SIMD128-NEXT: v128.store 16($1), $0{{$}}
517define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
518  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
519  store <4 x i32> %v , <4 x i32>* %s
520  ret void
521}
522
523; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
524; NO-SIMD128-NOT: v128
525; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
526; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
527; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
528; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
529define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
530  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
531  store <4 x i32> %v , <4 x i32>* %s
532  ret void
533}
534
535; CHECK-LABEL: store_v4i32_with_unfolded_offset:
536; NO-SIMD128-NOT: v128
537; SIMD128-NEXT: .functype store_v4i32_with_unfolded_offset (v128, i32) -> (){{$}}
538; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
539; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
540; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
541define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
542  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
543  store <4 x i32> %v , <4 x i32>* %s
544  ret void
545}
546
547; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
548; NO-SIMD128-NOT: v128
549; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> (){{$}}
550; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
551; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
552; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
553define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
554  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
555  store <4 x i32> %v , <4 x i32>* %s
556  ret void
557}
558
559; CHECK-LABEL: store_v4i32_to_numeric_address:
560; NO-SIMD128-NOT: v128
561; SIMD128-NEXT: .functype store_v4i32_to_numeric_address (v128) -> (){{$}}
562; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
563; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
564define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
565  %s = inttoptr i32 32 to <4 x i32>*
566  store <4 x i32> %v , <4 x i32>* %s
567  ret void
568}
569
570; CHECK-LABEL: store_v4i32_to_global_address:
571; NO-SIMD128-NOT: v128
572; SIMD128-NEXT: .functype store_v4i32_to_global_address (v128) -> (){{$}}
573; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
574; SIMD128-NEXT: v128.store gv_v4i32($pop[[R]]), $0{{$}}
575define void @store_v4i32_to_global_address(<4 x i32> %v) {
576  store <4 x i32> %v , <4 x i32>* @gv_v4i32
577  ret void
578}
579
580; ==============================================================================
581; 2 x i64
582; ==============================================================================
583; CHECK-LABEL: load_v2i64:
584; NO-SIMD128-NOT: v128
585; SIMD128-VM-NOT: v128
586; SIMD128-NEXT: .functype load_v2i64 (i32) -> (v128){{$}}
587; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
588; SIMD128-NEXT: return $pop[[R]]{{$}}
589define <2 x i64> @load_v2i64(<2 x i64>* %p) {
590  %v = load <2 x i64>, <2 x i64>* %p
591  ret <2 x i64> %v
592}
593
594; CHECK-LABEL: load_v2i64_with_folded_offset:
595; NO-SIMD128-NOT: v128
596; SIMD128-VM-NOT: v128
597; SIMD128-NEXT: .functype load_v2i64_with_folded_offset (i32) -> (v128){{$}}
598; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
599; SIMD128-NEXT: return $pop[[R]]{{$}}
600define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
601  %q = ptrtoint <2 x i64>* %p to i32
602  %r = add nuw i32 %q, 16
603  %s = inttoptr i32 %r to <2 x i64>*
604  %v = load <2 x i64>, <2 x i64>* %s
605  ret <2 x i64> %v
606}
607
608; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
609; NO-SIMD128-NOT: v128
610; SIMD128-VM-NOT: v128
611; SIMD128-NEXT: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
612; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
613; SIMD128-NEXT: return $pop[[R]]{{$}}
614define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
615  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
616  %v = load <2 x i64>, <2 x i64>* %s
617  ret <2 x i64> %v
618}
619
620; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
621; NO-SIMD128-NOT: v128
622; SIMD128-VM-NOT: v128
623; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
624; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
625; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
626; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
627; SIMD128-NEXT: return $pop[[R]]{{$}}
628define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
629  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
630  %v = load <2 x i64>, <2 x i64>* %s
631  ret <2 x i64> %v
632}
633
634; CHECK-LABEL: load_v2i64_with_unfolded_offset:
635; NO-SIMD128-NOT: v128
636; SIMD128-VM-NOT: v128
637; SIMD128-NEXT: .functype load_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
638; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
639; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
640; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
641; SIMD128-NEXT: return $pop[[R]]{{$}}
642define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
643  %q = ptrtoint <2 x i64>* %p to i32
644  %r = add nsw i32 %q, 16
645  %s = inttoptr i32 %r to <2 x i64>*
646  %v = load <2 x i64>, <2 x i64>* %s
647  ret <2 x i64> %v
648}
649
650; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
651; NO-SIMD128-NOT: v128
652; SIMD128-VM-NOT: v128
653; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
654; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
655; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
656; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
657; SIMD128-NEXT: return $pop[[R]]{{$}}
658define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
659  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
660  %v = load <2 x i64>, <2 x i64>* %s
661  ret <2 x i64> %v
662}
663
664; CHECK-LABEL: load_v2i64_from_numeric_address:
665; NO-SIMD128-NOT: v128
666; SIMD128-VM-NOT: v128
667; SIMD128-NEXT: .functype load_v2i64_from_numeric_address () -> (v128){{$}}
668; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
669; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
670; SIMD128-NEXT: return $pop[[R]]{{$}}
671define <2 x i64> @load_v2i64_from_numeric_address() {
672  %s = inttoptr i32 32 to <2 x i64>*
673  %v = load <2 x i64>, <2 x i64>* %s
674  ret <2 x i64> %v
675}
676
677; CHECK-LABEL: load_v2i64_from_global_address:
678; NO-SIMD128-NOT: v128
679; SIMD128-VM-NOT: v128
680; SIMD128-NEXT: .functype load_v2i64_from_global_address () -> (v128){{$}}
681; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
682; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2i64($pop[[L0]]){{$}}
683; SIMD128-NEXT: return $pop[[R]]{{$}}
684@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
685define <2 x i64> @load_v2i64_from_global_address() {
686  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
687  ret <2 x i64> %v
688}
689
690; CHECK-LABEL: store_v2i64:
691; NO-SIMD128-NOT: v128
692; SIMD128-VM-NOT: v128
693; SIMD128-NEXT: .functype store_v2i64 (v128, i32) -> (){{$}}
694; SIMD128-NEXT: v128.store 0($1), $0{{$}}
695define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
696  store <2 x i64> %v , <2 x i64>* %p
697  ret void
698}
699
700; CHECK-LABEL: store_v2i64_with_folded_offset:
701; NO-SIMD128-NOT: v128
702; SIMD128-VM-NOT: v128
703; SIMD128-NEXT: .functype store_v2i64_with_folded_offset (v128, i32) -> (){{$}}
704; SIMD128-NEXT: v128.store 16($1), $0{{$}}
705define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
706  %q = ptrtoint <2 x i64>* %p to i32
707  %r = add nuw i32 %q, 16
708  %s = inttoptr i32 %r to <2 x i64>*
709  store <2 x i64> %v , <2 x i64>* %s
710  ret void
711}
712
713; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
714; NO-SIMD128-NOT: v128
715; SIMD128-VM-NOT: v128
716; SIMD128-NEXT: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> (){{$}}
717; SIMD128-NEXT: v128.store 16($1), $0{{$}}
718define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
719  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
720  store <2 x i64> %v , <2 x i64>* %s
721  ret void
722}
723
724; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
725; NO-SIMD128-NOT: v128
726; SIMD128-VM-NOT: v128
727; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
728; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
729; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
730; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
731define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
732  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
733  store <2 x i64> %v , <2 x i64>* %s
734  ret void
735}
736
737; CHECK-LABEL: store_v2i64_with_unfolded_offset:
738; NO-SIMD128-NOT: v128
739; SIMD128-VM-NOT: v128
740; SIMD128-NEXT: .functype store_v2i64_with_unfolded_offset (v128, i32) -> (){{$}}
741; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
742; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
743; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
744define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
745  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
746  store <2 x i64> %v , <2 x i64>* %s
747  ret void
748}
749
750; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
751; NO-SIMD128-NOT: v128
752; SIMD128-VM-NOT: v128
753; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> (){{$}}
754; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
755; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
756; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
757define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
758  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
759  store <2 x i64> %v , <2 x i64>* %s
760  ret void
761}
762
763; CHECK-LABEL: store_v2i64_to_numeric_address:
764; NO-SIMD128-NOT: v128
765; SIMD128-VM-NOT: v128
766; SIMD128-NEXT: .functype store_v2i64_to_numeric_address (v128) -> (){{$}}
767; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
768; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
769define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
770  %s = inttoptr i32 32 to <2 x i64>*
771  store <2 x i64> %v , <2 x i64>* %s
772  ret void
773}
774
775; CHECK-LABEL: store_v2i64_to_global_address:
776; NO-SIMD128-NOT: v128
777; SIMD128-VM-NOT: v128
778; SIMD128-NEXT: .functype store_v2i64_to_global_address (v128) -> (){{$}}
779; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
780; SIMD128-NEXT: v128.store gv_v2i64($pop[[R]]), $0{{$}}
781define void @store_v2i64_to_global_address(<2 x i64> %v) {
782  store <2 x i64> %v , <2 x i64>* @gv_v2i64
783  ret void
784}
785
786; ==============================================================================
787; 4 x float
788; ==============================================================================
789; CHECK-LABEL: load_v4f32:
790; NO-SIMD128-NOT: v128
791; SIMD128-NEXT: .functype load_v4f32 (i32) -> (v128){{$}}
792; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
793; SIMD128-NEXT: return $pop[[R]]{{$}}
794define <4 x float> @load_v4f32(<4 x float>* %p) {
795  %v = load <4 x float>, <4 x float>* %p
796  ret <4 x float> %v
797}
798
799; CHECK-LABEL: load_v4f32_with_folded_offset:
800; NO-SIMD128-NOT: v128
801; SIMD128-NEXT: .functype load_v4f32_with_folded_offset (i32) -> (v128){{$}}
802; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
803; SIMD128-NEXT: return $pop[[R]]{{$}}
804define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
805  %q = ptrtoint <4 x float>* %p to i32
806  %r = add nuw i32 %q, 16
807  %s = inttoptr i32 %r to <4 x float>*
808  %v = load <4 x float>, <4 x float>* %s
809  ret <4 x float> %v
810}
811
812; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
813; NO-SIMD128-NOT: v128
814; SIMD128-NEXT: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128){{$}}
815; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
816; SIMD128-NEXT: return $pop[[R]]{{$}}
817define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
818  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
819  %v = load <4 x float>, <4 x float>* %s
820  ret <4 x float> %v
821}
822
823; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
824; NO-SIMD128-NOT: v128
825; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
826; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
827; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
828; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
829; SIMD128-NEXT: return $pop[[R]]{{$}}
830define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
831  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
832  %v = load <4 x float>, <4 x float>* %s
833  ret <4 x float> %v
834}
835
836; CHECK-LABEL: load_v4f32_with_unfolded_offset:
837; NO-SIMD128-NOT: v128
838; SIMD128-NEXT: .functype load_v4f32_with_unfolded_offset (i32) -> (v128){{$}}
839; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
840; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
841; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
842; SIMD128-NEXT: return $pop[[R]]{{$}}
843define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
844  %q = ptrtoint <4 x float>* %p to i32
845  %r = add nsw i32 %q, 16
846  %s = inttoptr i32 %r to <4 x float>*
847  %v = load <4 x float>, <4 x float>* %s
848  ret <4 x float> %v
849}
850
851; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
852; NO-SIMD128-NOT: v128
853; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}}
854; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
855; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
856; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
857; SIMD128-NEXT: return $pop[[R]]{{$}}
858define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
859  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
860  %v = load <4 x float>, <4 x float>* %s
861  ret <4 x float> %v
862}
863
864; CHECK-LABEL: load_v4f32_from_numeric_address:
865; NO-SIMD128-NOT: v128
866; SIMD128-NEXT: .functype load_v4f32_from_numeric_address () -> (v128){{$}}
867; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
868; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
869; SIMD128-NEXT: return $pop[[R]]{{$}}
870define <4 x float> @load_v4f32_from_numeric_address() {
871  %s = inttoptr i32 32 to <4 x float>*
872  %v = load <4 x float>, <4 x float>* %s
873  ret <4 x float> %v
874}
875
876; CHECK-LABEL: load_v4f32_from_global_address:
877; NO-SIMD128-NOT: v128
878; SIMD128-NEXT: .functype load_v4f32_from_global_address () -> (v128){{$}}
879; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
880; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4f32($pop[[L0]]){{$}}
881; SIMD128-NEXT: return $pop[[R]]{{$}}
882@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
883define <4 x float> @load_v4f32_from_global_address() {
884  %v = load <4 x float>, <4 x float>* @gv_v4f32
885  ret <4 x float> %v
886}
887
888; CHECK-LABEL: store_v4f32:
889; NO-SIMD128-NOT: v128
890; SIMD128-NEXT: .functype store_v4f32 (v128, i32) -> (){{$}}
891; SIMD128-NEXT: v128.store 0($1), $0{{$}}
892define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
893  store <4 x float> %v , <4 x float>* %p
894  ret void
895}
896
897; CHECK-LABEL: store_v4f32_with_folded_offset:
898; NO-SIMD128-NOT: v128
899; SIMD128-NEXT: .functype store_v4f32_with_folded_offset (v128, i32) -> (){{$}}
900; SIMD128-NEXT: v128.store 16($1), $0{{$}}
901define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
902  %q = ptrtoint <4 x float>* %p to i32
903  %r = add nuw i32 %q, 16
904  %s = inttoptr i32 %r to <4 x float>*
905  store <4 x float> %v , <4 x float>* %s
906  ret void
907}
908
909; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
910; NO-SIMD128-NOT: v128
911; SIMD128-NEXT: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> (){{$}}
912; SIMD128-NEXT: v128.store 16($1), $0{{$}}
913define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
914  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
915  store <4 x float> %v , <4 x float>* %s
916  ret void
917}
918
919; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
920; NO-SIMD128-NOT: v128
921; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
922; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
923; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
924; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
925define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
926  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
927  store <4 x float> %v , <4 x float>* %s
928  ret void
929}
930
931; CHECK-LABEL: store_v4f32_with_unfolded_offset:
932; NO-SIMD128-NOT: v128
933; SIMD128-NEXT: .functype store_v4f32_with_unfolded_offset (v128, i32) -> (){{$}}
934; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
935; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
936; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
937define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
938  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
939  store <4 x float> %v , <4 x float>* %s
940  ret void
941}
942
943; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
944; NO-SIMD128-NOT: v128
945; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> (){{$}}
946; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
947; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
948; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
949define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
950  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
951  store <4 x float> %v , <4 x float>* %s
952  ret void
953}
954
955; CHECK-LABEL: store_v4f32_to_numeric_address:
956; NO-SIMD128-NOT: v128
957; SIMD128-NEXT: .functype store_v4f32_to_numeric_address (v128) -> (){{$}}
958; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
959; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
960define void @store_v4f32_to_numeric_address(<4 x float> %v) {
961  %s = inttoptr i32 32 to <4 x float>*
962  store <4 x float> %v , <4 x float>* %s
963  ret void
964}
965
966; CHECK-LABEL: store_v4f32_to_global_address:
967; NO-SIMD128-NOT: v128
968; SIMD128-NEXT: .functype store_v4f32_to_global_address (v128) -> (){{$}}
969; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
970; SIMD128-NEXT: v128.store gv_v4f32($pop[[R]]), $0{{$}}
971define void @store_v4f32_to_global_address(<4 x float> %v) {
972  store <4 x float> %v , <4 x float>* @gv_v4f32
973  ret void
974}
975
976; ==============================================================================
977; 2 x double
978; ==============================================================================
979; CHECK-LABEL: load_v2f64:
980; NO-SIMD128-NOT: v128
981; SIMD128-VM-NOT: v128
982; SIMD128-NEXT: .functype load_v2f64 (i32) -> (v128){{$}}
983; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
984; SIMD128-NEXT: return $pop[[R]]{{$}}
985define <2 x double> @load_v2f64(<2 x double>* %p) {
986  %v = load <2 x double>, <2 x double>* %p
987  ret <2 x double> %v
988}
989
990; CHECK-LABEL: load_v2f64_with_folded_offset:
991; NO-SIMD128-NOT: v128
992; SIMD128-VM-NOT: v128
993; SIMD128-NEXT: .functype load_v2f64_with_folded_offset (i32) -> (v128){{$}}
994; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
995; SIMD128-NEXT: return $pop[[R]]{{$}}
996define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
997  %q = ptrtoint <2 x double>* %p to i32
998  %r = add nuw i32 %q, 16
999  %s = inttoptr i32 %r to <2 x double>*
1000  %v = load <2 x double>, <2 x double>* %s
1001  ret <2 x double> %v
1002}
1003
1004; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
1005; NO-SIMD128-NOT: v128
1006; SIMD128-VM-NOT: v128
1007; SIMD128-NEXT: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128){{$}}
1008; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1009; SIMD128-NEXT: return $pop[[R]]{{$}}
1010define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
1011  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
1012  %v = load <2 x double>, <2 x double>* %s
1013  ret <2 x double> %v
1014}
1015
1016; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
1017; NO-SIMD128-NOT: v128
1018; SIMD128-VM-NOT: v128
1019; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1020; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1021; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1022; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1023; SIMD128-NEXT: return $pop[[R]]{{$}}
1024define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
1025  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
1026  %v = load <2 x double>, <2 x double>* %s
1027  ret <2 x double> %v
1028}
1029
1030; CHECK-LABEL: load_v2f64_with_unfolded_offset:
1031; NO-SIMD128-NOT: v128
1032; SIMD128-VM-NOT: v128
1033; SIMD128-NEXT: .functype load_v2f64_with_unfolded_offset (i32) -> (v128){{$}}
1034; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1035; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1036; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1037; SIMD128-NEXT: return $pop[[R]]{{$}}
1038define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
1039  %q = ptrtoint <2 x double>* %p to i32
1040  %r = add nsw i32 %q, 16
1041  %s = inttoptr i32 %r to <2 x double>*
1042  %v = load <2 x double>, <2 x double>* %s
1043  ret <2 x double> %v
1044}
1045
1046; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
1047; NO-SIMD128-NOT: v128
1048; SIMD128-VM-NOT: v128
1049; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1050; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1051; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1052; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1053; SIMD128-NEXT: return $pop[[R]]{{$}}
1054define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
1055  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
1056  %v = load <2 x double>, <2 x double>* %s
1057  ret <2 x double> %v
1058}
1059
1060; CHECK-LABEL: load_v2f64_from_numeric_address:
1061; NO-SIMD128-NOT: v128
1062; SIMD128-VM-NOT: v128
1063; SIMD128-NEXT: .functype load_v2f64_from_numeric_address () -> (v128){{$}}
1064; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1065; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1066; SIMD128-NEXT: return $pop[[R]]{{$}}
1067define <2 x double> @load_v2f64_from_numeric_address() {
1068  %s = inttoptr i32 32 to <2 x double>*
1069  %v = load <2 x double>, <2 x double>* %s
1070  ret <2 x double> %v
1071}
1072
1073; CHECK-LABEL: load_v2f64_from_global_address:
1074; NO-SIMD128-NOT: v128
1075; SIMD128-VM-NOT: v128
1076; SIMD128-NEXT: .functype load_v2f64_from_global_address () -> (v128){{$}}
1077; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1078; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2f64($pop[[L0]]){{$}}
1079; SIMD128-NEXT: return $pop[[R]]{{$}}
1080@gv_v2f64 = global <2 x double> <double 42., double 42.>
1081define <2 x double> @load_v2f64_from_global_address() {
1082  %v = load <2 x double>, <2 x double>* @gv_v2f64
1083  ret <2 x double> %v
1084}
1085
1086; CHECK-LABEL: store_v2f64:
1087; NO-SIMD128-NOT: v128
1088; SIMD128-VM-NOT: v128
1089; SIMD128-NEXT: .functype store_v2f64 (v128, i32) -> (){{$}}
1090; SIMD128-NEXT: v128.store 0($1), $0{{$}}
1091define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
1092  store <2 x double> %v , <2 x double>* %p
1093  ret void
1094}
1095
1096; CHECK-LABEL: store_v2f64_with_folded_offset:
1097; NO-SIMD128-NOT: v128
1098; SIMD128-VM-NOT: v128
1099; SIMD128-NEXT: .functype store_v2f64_with_folded_offset (v128, i32) -> (){{$}}
1100; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1101define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
1102  %q = ptrtoint <2 x double>* %p to i32
1103  %r = add nuw i32 %q, 16
1104  %s = inttoptr i32 %r to <2 x double>*
1105  store <2 x double> %v , <2 x double>* %s
1106  ret void
1107}
1108
1109; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
1110; NO-SIMD128-NOT: v128
1111; SIMD128-VM-NOT: v128
1112; SIMD128-NEXT: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> (){{$}}
1113; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1114define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
1115  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
1116  store <2 x double> %v , <2 x double>* %s
1117  ret void
1118}
1119
1120; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
1121; NO-SIMD128-NOT: v128
1122; SIMD128-VM-NOT: v128
1123; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
1124; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1125; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1126; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1127define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
1128  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
1129  store <2 x double> %v , <2 x double>* %s
1130  ret void
1131}
1132
1133; CHECK-LABEL: store_v2f64_with_unfolded_offset:
1134; NO-SIMD128-NOT: v128
1135; SIMD128-VM-NOT: v128
1136; SIMD128-NEXT: .functype store_v2f64_with_unfolded_offset (v128, i32) -> (){{$}}
1137; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1138; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1139; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1140define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
1141  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
1142  store <2 x double> %v , <2 x double>* %s
1143  ret void
1144}
1145
1146; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
1147; NO-SIMD128-NOT: v128
1148; SIMD128-VM-NOT: v128
1149; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> (){{$}}
1150; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1151; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1152; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1153define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
1154  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
1155  store <2 x double> %v , <2 x double>* %s
1156  ret void
1157}
1158
1159; CHECK-LABEL: store_v2f64_to_numeric_address:
1160; NO-SIMD128-NOT: v128
1161; SIMD128-VM-NOT: v128
1162; SIMD128-NEXT: .functype store_v2f64_to_numeric_address (v128) -> (){{$}}
1163; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1164; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
1165define void @store_v2f64_to_numeric_address(<2 x double> %v) {
1166  %s = inttoptr i32 32 to <2 x double>*
1167  store <2 x double> %v , <2 x double>* %s
1168  ret void
1169}
1170
1171; CHECK-LABEL: store_v2f64_to_global_address:
1172; NO-SIMD128-NOT: v128
1173; SIMD128-VM-NOT: v128
1174; SIMD128-NEXT: .functype store_v2f64_to_global_address (v128) -> (){{$}}
1175; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
1176; SIMD128-NEXT: v128.store gv_v2f64($pop[[R]]), $0{{$}}
1177define void @store_v2f64_to_global_address(<2 x double> %v) {
1178  store <2 x double> %v , <2 x double>* @gv_v2f64
1179  ret void
1180}
1181