1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128
3
4; Test SIMD loads and stores
5
6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
7target triple = "wasm32-unknown-unknown"
8
9; ==============================================================================
10; 16 x i8
11; ==============================================================================
12; CHECK-LABEL: load_v16i8:
13; NO-SIMD128-NOT: v128
14; SIMD128-NEXT: .functype load_v16i8 (i32) -> (v128){{$}}
15; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
16; SIMD128-NEXT: return $pop[[R]]{{$}}
17define <16 x i8> @load_v16i8(<16 x i8>* %p) {
18  %v = load <16 x i8>, <16 x i8>* %p
19  ret <16 x i8> %v
20}
21
22; CHECK-LABEL: load_splat_v16i8:
23; NO-SIMD128-NOT: v128
24; SIMD128-NEXT: .functype load_splat_v16i8 (i32) -> (v128){{$}}
25; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
26; SIMD128-NEXT: return $pop[[R]]{{$}}
27define <16 x i8> @load_splat_v16i8(i8* %p) {
28  %e = load i8, i8* %p
29  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
30  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
31  ret <16 x i8> %v2
32}
33
34; CHECK-LABEL: load_v16i8_with_folded_offset:
35; NO-SIMD128-NOT: v128
36; SIMD128-NEXT: .functype load_v16i8_with_folded_offset (i32) -> (v128){{$}}
37; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
38; SIMD128-NEXT: return $pop[[R]]{{$}}
39define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
40  %q = ptrtoint <16 x i8>* %p to i32
41  %r = add nuw i32 %q, 16
42  %s = inttoptr i32 %r to <16 x i8>*
43  %v = load <16 x i8>, <16 x i8>* %s
44  ret <16 x i8> %v
45}
46
47; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
48; NO-SIMD128-NOT: v128
49; SIMD128-NEXT: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128){{$}}
50; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
51; SIMD128-NEXT: return $pop[[R]]{{$}}
52define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
53  %q = ptrtoint i8* %p to i32
54  %r = add nuw i32 %q, 16
55  %s = inttoptr i32 %r to i8*
56  %e = load i8, i8* %s
57  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
58  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
59  ret <16 x i8> %v2
60}
61
62; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
63; NO-SIMD128-NOT: v128
64; SIMD128-NEXT: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128){{$}}
65; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
66; SIMD128-NEXT: return $pop[[R]]{{$}}
67define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
68  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
69  %v = load <16 x i8>, <16 x i8>* %s
70  ret <16 x i8> %v
71}
72
73; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
74; NO-SIMD128-NOT: v128
75; SIMD128-NEXT: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128){{$}}
76; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 1($0){{$}}
77; SIMD128-NEXT: return $pop[[R]]{{$}}
78define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
79  %s = getelementptr inbounds i8, i8* %p, i32 1
80  %e = load i8, i8* %s
81  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
82  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
83  ret <16 x i8> %v2
84}
85
86; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
87; NO-SIMD128-NOT: v128
88; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
89; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
90; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
91; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
92; SIMD128-NEXT: return $pop[[R]]{{$}}
93define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
94  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
95  %v = load <16 x i8>, <16 x i8>* %s
96  ret <16 x i8> %v
97}
98
99; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
100; NO-SIMD128-NOT: v128
101; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
102; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
103; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
104; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
105; SIMD128-NEXT: return $pop[[R]]{{$}}
106define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
107  %s = getelementptr inbounds i8, i8* %p, i32 -1
108  %e = load i8, i8* %s
109  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
110  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
111  ret <16 x i8> %v2
112}
113
114; CHECK-LABEL: load_v16i8_with_unfolded_offset:
115; NO-SIMD128-NOT: v128
116; SIMD128-NEXT: .functype load_v16i8_with_unfolded_offset (i32) -> (v128){{$}}
117; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
118; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
119; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
120; SIMD128-NEXT: return $pop[[R]]{{$}}
121define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
122  %q = ptrtoint <16 x i8>* %p to i32
123  %r = add nsw i32 %q, 16
124  %s = inttoptr i32 %r to <16 x i8>*
125  %v = load <16 x i8>, <16 x i8>* %s
126  ret <16 x i8> %v
127}
128
129; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
130; NO-SIMD128-NOT: v128
131; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128){{$}}
132; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
133; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
134; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
135; SIMD128-NEXT: return $pop[[R]]{{$}}
136define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
137  %q = ptrtoint i8* %p to i32
138  %r = add nsw i32 %q, 16
139  %s = inttoptr i32 %r to i8*
140  %e = load i8, i8* %s
141  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
142  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
143  ret <16 x i8> %v2
144}
145
146; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
147; NO-SIMD128-NOT: v128
148; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}}
149; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
150; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
151; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
152; SIMD128-NEXT: return $pop[[R]]{{$}}
153define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
154  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
155  %v = load <16 x i8>, <16 x i8>* %s
156  ret <16 x i8> %v
157}
158
159; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
160; NO-SIMD128-NOT: v128
161; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}}
162; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 1{{$}}
163; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
164; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
165; SIMD128-NEXT: return $pop[[R]]{{$}}
166define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
167  %s = getelementptr i8, i8* %p, i32 1
168  %e = load i8, i8* %s
169  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
170  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
171  ret <16 x i8> %v2
172}
173
174; CHECK-LABEL: load_v16i8_from_numeric_address:
175; NO-SIMD128-NOT: v128
176; SIMD128-NEXT: .functype load_v16i8_from_numeric_address () -> (v128){{$}}
177; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
178; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
179; SIMD128-NEXT: return $pop[[R]]{{$}}
180define <16 x i8> @load_v16i8_from_numeric_address() {
181  %s = inttoptr i32 32 to <16 x i8>*
182  %v = load <16 x i8>, <16 x i8>* %s
183  ret <16 x i8> %v
184}
185
186; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
187; NO-SIMD128-NOT: v128
188; SIMD128-NEXT: .functype load_splat_v16i8_from_numeric_address () -> (v128){{$}}
189; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
190; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
191; SIMD128-NEXT: return $pop[[R]]{{$}}
192define <16 x i8> @load_splat_v16i8_from_numeric_address() {
193  %s = inttoptr i32 32 to i8*
194  %e = load i8, i8* %s
195  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
196  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
197  ret <16 x i8> %v2
198}
199
200; CHECK-LABEL: load_v16i8_from_global_address:
201; NO-SIMD128-NOT: v128
202; SIMD128-NEXT: .functype load_v16i8_from_global_address () -> (v128){{$}}
203; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
204; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v16i8($pop[[L0]]){{$}}
205; SIMD128-NEXT: return $pop[[R]]{{$}}
206@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
207define <16 x i8> @load_v16i8_from_global_address() {
208  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
209  ret <16 x i8> %v
210}
211
212; CHECK-LABEL: load_splat_v16i8_from_global_address:
213; NO-SIMD128-NOT: v128
214; SIMD128-NEXT: .functype load_splat_v16i8_from_global_address () -> (v128){{$}}
215; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
216; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, gv_i8($pop[[L0]]){{$}}
217; SIMD128-NEXT: return $pop[[R]]{{$}}
218@gv_i8 = global i8 42
219define <16 x i8> @load_splat_v16i8_from_global_address() {
220  %e = load i8, i8* @gv_i8
221  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
222  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
223  ret <16 x i8> %v2
224}
225
226; CHECK-LABEL: store_v16i8:
227; NO-SIMD128-NOT: v128
228; SIMD128-NEXT: .functype store_v16i8 (v128, i32) -> (){{$}}
229; SIMD128-NEXT: v128.store 0($1), $0{{$}}
230define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
231  store <16 x i8> %v , <16 x i8>* %p
232  ret void
233}
234
235; CHECK-LABEL: store_v16i8_with_folded_offset:
236; NO-SIMD128-NOT: v128
237; SIMD128-NEXT: .functype store_v16i8_with_folded_offset (v128, i32) -> (){{$}}
238; SIMD128-NEXT: v128.store 16($1), $0{{$}}
239define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
240  %q = ptrtoint <16 x i8>* %p to i32
241  %r = add nuw i32 %q, 16
242  %s = inttoptr i32 %r to <16 x i8>*
243  store <16 x i8> %v , <16 x i8>* %s
244  ret void
245}
246
247; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
248; NO-SIMD128-NOT: v128
249; SIMD128-NEXT: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> (){{$}}
250; SIMD128-NEXT: v128.store 16($1), $0{{$}}
251define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
252  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
253  store <16 x i8> %v , <16 x i8>* %s
254  ret void
255}
256
257; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
258; NO-SIMD128-NOT: v128
259; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
260; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
261; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
262; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
263define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
264  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
265  store <16 x i8> %v , <16 x i8>* %s
266  ret void
267}
268
269; CHECK-LABEL: store_v16i8_with_unfolded_offset:
270; NO-SIMD128-NOT: v128
271; SIMD128-NEXT: .functype store_v16i8_with_unfolded_offset (v128, i32) -> (){{$}}
272; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
273; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
274; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
275define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
276  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
277  store <16 x i8> %v , <16 x i8>* %s
278  ret void
279}
280
281; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
282; NO-SIMD128-NOT: v128
283; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> (){{$}}
284; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
285; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
286; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
287define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
288  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
289  store <16 x i8> %v , <16 x i8>* %s
290  ret void
291}
292
293; CHECK-LABEL: store_v16i8_to_numeric_address:
294; NO-SIMD128-NOT: v128
295; SIMD128-NEXT: .functype store_v16i8_to_numeric_address (v128) -> (){{$}}
296; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
297; SIMD128-NEXT: v128.store 32($pop[[R]]), $0{{$}}
298define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
299  %s = inttoptr i32 32 to <16 x i8>*
300  store <16 x i8> %v , <16 x i8>* %s
301  ret void
302}
303
304; CHECK-LABEL: store_v16i8_to_global_address:
305; NO-SIMD128-NOT: v128
306; SIMD128-NEXT: .functype store_v16i8_to_global_address (v128) -> (){{$}}
307; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
308; SIMD128-NEXT: v128.store gv_v16i8($pop[[R]]), $0{{$}}
309define void @store_v16i8_to_global_address(<16 x i8> %v) {
310  store <16 x i8> %v , <16 x i8>* @gv_v16i8
311  ret void
312}
313
314; ==============================================================================
315; 8 x i16
316; ==============================================================================
317; CHECK-LABEL: load_v8i16:
318; NO-SIMD128-NOT: v128
319; SIMD128-NEXT: .functype load_v8i16 (i32) -> (v128){{$}}
320; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
321; SIMD128-NEXT: return $pop[[R]]{{$}}
322define <8 x i16> @load_v8i16(<8 x i16>* %p) {
323  %v = load <8 x i16>, <8 x i16>* %p
324  ret <8 x i16> %v
325}
326
327; CHECK-LABEL: load_splat_v8i16:
328; NO-SIMD128-NOT: v128
329; SIMD128-NEXT: .functype load_splat_v8i16 (i32) -> (v128){{$}}
330; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
331; SIMD128-NEXT: return $pop[[R]]{{$}}
332define <8 x i16> @load_splat_v8i16(i16* %p) {
333  %e = load i16, i16* %p
334  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
335  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
336  ret <8 x i16> %v2
337}
338
339; CHECK-LABEL: load_sext_v8i16:
340; NO-SIMD128-NOT: v128
341; SIMD128-NEXT: .functype load_sext_v8i16 (i32) -> (v128){{$}}
342; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($0){{$}}
343; SIMD128-NEXT: return $pop[[R]]{{$}}
344define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
345  %v = load <8 x i8>, <8 x i8>* %p
346  %v2 = sext <8 x i8> %v to <8 x i16>
347  ret <8 x i16> %v2
348}
349
350; CHECK-LABEL: load_zext_v8i16:
351; NO-SIMD128-NOT: v128
352; SIMD128-NEXT: .functype load_zext_v8i16 (i32) -> (v128){{$}}
353; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($0){{$}}
354; SIMD128-NEXT: return $pop[[R]]{{$}}
355define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
356  %v = load <8 x i8>, <8 x i8>* %p
357  %v2 = zext <8 x i8> %v to <8 x i16>
358  ret <8 x i16> %v2
359}
360
361; CHECK-LABEL: load_ext_v8i16:
362; NO-SIMD128-NOT: load8x8
363; SIMD128-NEXT: .functype load_ext_v8i16 (i32) -> (v128){{$}}
364; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($0){{$}}
365; SIMD128-NEXT: return $pop[[R]]{{$}}
366define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
367  %v = load <8 x i8>, <8 x i8>* %p
368  ret <8 x i8> %v
369}
370
371; CHECK-LABEL: load_v8i16_with_folded_offset:
372; NO-SIMD128-NOT: v128
373; SIMD128-NEXT: .functype load_v8i16_with_folded_offset (i32) -> (v128){{$}}
374; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
375; SIMD128-NEXT: return $pop[[R]]{{$}}
376define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
377  %q = ptrtoint <8 x i16>* %p to i32
378  %r = add nuw i32 %q, 16
379  %s = inttoptr i32 %r to <8 x i16>*
380  %v = load <8 x i16>, <8 x i16>* %s
381  ret <8 x i16> %v
382}
383
384; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
385; NO-SIMD128-NOT: v128
386; SIMD128-NEXT: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128){{$}}
387; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
388; SIMD128-NEXT: return $pop[[R]]{{$}}
389define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
390  %q = ptrtoint i16* %p to i32
391  %r = add nuw i32 %q, 16
392  %s = inttoptr i32 %r to i16*
393  %e = load i16, i16* %s
394  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
395  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
396  ret <8 x i16> %v2
397}
398
399; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
400; NO-SIMD128-NOT: v128
401; SIMD128-NEXT: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128){{$}}
402; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 16($0){{$}}
403; SIMD128-NEXT: return $pop[[R]]{{$}}
404define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
405  %q = ptrtoint <8 x i8>* %p to i32
406  %r = add nuw i32 %q, 16
407  %s = inttoptr i32 %r to <8 x i8>*
408  %v = load <8 x i8>, <8 x i8>* %s
409  %v2 = sext <8 x i8> %v to <8 x i16>
410  ret <8 x i16> %v2
411}
412
413; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
414; NO-SIMD128-NOT: v128
415; SIMD128-NEXT: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128){{$}}
416; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 16($0){{$}}
417; SIMD128-NEXT: return $pop[[R]]{{$}}
418define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
419  %q = ptrtoint <8 x i8>* %p to i32
420  %r = add nuw i32 %q, 16
421  %s = inttoptr i32 %r to <8 x i8>*
422  %v = load <8 x i8>, <8 x i8>* %s
423  %v2 = zext <8 x i8> %v to <8 x i16>
424  ret <8 x i16> %v2
425}
426
427; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
428; NO-SIMD128-NOT: load8x8
429; SIMD128-NEXT: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128){{$}}
430; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 16($0){{$}}
431; SIMD128-NEXT: return $pop[[R]]{{$}}
432define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
433  %q = ptrtoint <8 x i8>* %p to i32
434  %r = add nuw i32 %q, 16
435  %s = inttoptr i32 %r to <8 x i8>*
436  %v = load <8 x i8>, <8 x i8>* %s
437  ret <8 x i8> %v
438}
439
440; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
441; NO-SIMD128-NOT: v128
442; SIMD128-NEXT: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
443; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
444; SIMD128-NEXT: return $pop[[R]]{{$}}
445define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
446  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
447  %v = load <8 x i16>, <8 x i16>* %s
448  ret <8 x i16> %v
449}
450
451; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
452; NO-SIMD128-NOT: v128
453; SIMD128-NEXT: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
454; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 2($0){{$}}
455; SIMD128-NEXT: return $pop[[R]]{{$}}
456define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
457  %s = getelementptr inbounds i16, i16* %p, i32 1
458  %e = load i16, i16* %s
459  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
460  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
461  ret <8 x i16> %v2
462}
463
464; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
465; NO-SIMD128-NOT: v128
466; SIMD128-NEXT: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
467; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 8($0){{$}}
468; SIMD128-NEXT: return $pop[[R]]{{$}}
469define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
470  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
471  %v = load <8 x i8>, <8 x i8>* %s
472  %v2 = sext <8 x i8> %v to <8 x i16>
473  ret <8 x i16> %v2
474}
475
476; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
477; NO-SIMD128-NOT: v128
478; SIMD128-NEXT: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
479; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 8($0){{$}}
480; SIMD128-NEXT: return $pop[[R]]{{$}}
481define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
482  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
483  %v = load <8 x i8>, <8 x i8>* %s
484  %v2 = zext <8 x i8> %v to <8 x i16>
485  ret <8 x i16> %v2
486}
487
488; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
489; NO-SIMD128-NOT: load8x8
490; SIMD128-NEXT: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
491; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 8($0){{$}}
492; SIMD128-NEXT: return $pop[[R]]{{$}}
493define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
494  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
495  %v = load <8 x i8>, <8 x i8>* %s
496  ret <8 x i8> %v
497}
498
499; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
500; NO-SIMD128-NOT: v128
501; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
502; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
503; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
504; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
505; SIMD128-NEXT: return $pop[[R]]{{$}}
506define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
507  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
508  %v = load <8 x i16>, <8 x i16>* %s
509  ret <8 x i16> %v
510}
511
512; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
513; NO-SIMD128-NOT: v128
514; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
515; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -2{{$}}
516; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
517; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
518; SIMD128-NEXT: return $pop[[R]]{{$}}
519define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
520  %s = getelementptr inbounds i16, i16* %p, i32 -1
521  %e = load i16, i16* %s
522  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
523  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
524  ret <8 x i16> %v2
525}
526
527; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
528; NO-SIMD128-NOT: v128
529; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
530; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
531; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
532; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
533; SIMD128-NEXT: return $pop[[R]]{{$}}
534define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
535  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
536  %v = load <8 x i8>, <8 x i8>* %s
537  %v2 = sext <8 x i8> %v to <8 x i16>
538  ret <8 x i16> %v2
539}
540
541; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
542; NO-SIMD128-NOT: v128
543; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
544; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
545; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
546; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
547; SIMD128-NEXT: return $pop[[R]]{{$}}
548define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
549  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
550  %v = load <8 x i8>, <8 x i8>* %s
551  %v2 = zext <8 x i8> %v to <8 x i16>
552  ret <8 x i16> %v2
553}
554
555; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
556; NO-SIMD128-NOT: load8x8
557; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
558; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
559; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
560; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
561; SIMD128-NEXT: return $pop[[R]]{{$}}
562define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
563  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
564  %v = load <8 x i8>, <8 x i8>* %s
565  ret <8 x i8> %v
566}
567
568; CHECK-LABEL: load_v8i16_with_unfolded_offset:
569; NO-SIMD128-NOT: v128
570; SIMD128-NEXT: .functype load_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
571; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
572; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
573; SIMD128-NEXT: v128.load $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
574; SIMD128-NEXT: return $pop[[L0]]{{$}}
575define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
576  %q = ptrtoint <8 x i16>* %p to i32
577  %r = add nsw i32 %q, 16
578  %s = inttoptr i32 %r to <8 x i16>*
579  %v = load <8 x i16>, <8 x i16>* %s
580  ret <8 x i16> %v
581}
582
583; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
584; NO-SIMD128-NOT: v128
585; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
586; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
587; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
588; SIMD128-NEXT: v16x8.load_splat $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
589; SIMD128-NEXT: return $pop[[L0]]{{$}}
590define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
591  %q = ptrtoint i16* %p to i32
592  %r = add nsw i32 %q, 16
593  %s = inttoptr i32 %r to i16*
594  %e = load i16, i16* %s
595  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
596  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
597  ret <8 x i16> %v2
598}
599
600; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
601; NO-SIMD128-NOT: v128
602; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
603; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
604; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
605; SIMD128-NEXT: i16x8.load8x8_s $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
606; SIMD128-NEXT: return $pop[[L0]]{{$}}
607define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
608  %q = ptrtoint <8 x i8>* %p to i32
609  %r = add nsw i32 %q, 16
610  %s = inttoptr i32 %r to <8 x i8>*
611  %v = load <8 x i8>, <8 x i8>* %s
612  %v2 = sext <8 x i8> %v to <8 x i16>
613  ret <8 x i16> %v2
614}
615
616; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
617; NO-SIMD128-NOT: v128
618; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
619; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
620; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
621; SIMD128-NEXT: i16x8.load8x8_u $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
622; SIMD128-NEXT: return $pop[[L0]]{{$}}
623define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
624  %q = ptrtoint <8 x i8>* %p to i32
625  %r = add nsw i32 %q, 16
626  %s = inttoptr i32 %r to <8 x i8>*
627  %v = load <8 x i8>, <8 x i8>* %s
628  %v2 = zext <8 x i8> %v to <8 x i16>
629  ret <8 x i16> %v2
630}
631
632; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
633; NO-SIMD128-NOT: load8x8
634; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
635; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
636; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
637; SIMD128-NEXT: i16x8.load8x8_u $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
638; SIMD128-NEXT: return $pop[[L0]]{{$}}
639define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
640  %q = ptrtoint <8 x i8>* %p to i32
641  %r = add nsw i32 %q, 16
642  %s = inttoptr i32 %r to <8 x i8>*
643  %v = load <8 x i8>, <8 x i8>* %s
644  ret <8 x i8> %v
645}
646
647; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
648; NO-SIMD128-NOT: v128
649; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
650; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
651; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
652; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
653; SIMD128-NEXT: return $pop[[R]]{{$}}
654define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
655  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
656  %v = load <8 x i16>, <8 x i16>* %s
657  ret <8 x i16> %v
658}
659
660; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
661; NO-SIMD128-NOT: v128
662; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
663; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 2{{$}}
664; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
665; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
666; SIMD128-NEXT: return $pop[[R]]{{$}}
667define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
668  %s = getelementptr i16, i16* %p, i32 1
669  %e = load i16, i16* %s
670  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
671  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
672  ret <8 x i16> %v2
673}
674
675; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
676; NO-SIMD128-NOT: v128
677; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
678; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
679; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
680; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
681; SIMD128-NEXT: return $pop[[R]]{{$}}
682define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
683  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
684  %v = load <8 x i8>, <8 x i8>* %s
685  %v2 = sext <8 x i8> %v to <8 x i16>
686  ret <8 x i16> %v2
687}
688
689; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
690; NO-SIMD128-NOT: v128
691; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
692; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
693; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
694; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
695; SIMD128-NEXT: return $pop[[R]]{{$}}
696define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
697  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
698  %v = load <8 x i8>, <8 x i8>* %s
699  %v2 = zext <8 x i8> %v to <8 x i16>
700  ret <8 x i16> %v2
701}
702
703; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
704; NO-SIMD128-NOT: load8x8
705; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
706; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
707; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
708; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
709; SIMD128-NEXT: return $pop[[R]]{{$}}
710define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
711  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
712  %v = load <8 x i8>, <8 x i8>* %s
713  ret <8 x i8> %v
714}
715
716; CHECK-LABEL: load_v8i16_from_numeric_address:
717; NO-SIMD128-NOT: v128
718; SIMD128-NEXT: .functype load_v8i16_from_numeric_address () -> (v128){{$}}
719; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
720; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
721; SIMD128-NEXT: return $pop[[R]]{{$}}
722define <8 x i16> @load_v8i16_from_numeric_address() {
723  %s = inttoptr i32 32 to <8 x i16>*
724  %v = load <8 x i16>, <8 x i16>* %s
725  ret <8 x i16> %v
726}
727
728; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
729; NO-SIMD128-NOT: v128
730; SIMD128-NEXT: .functype load_splat_v8i16_from_numeric_address () -> (v128){{$}}
731; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
732; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
733; SIMD128-NEXT: return $pop[[R]]{{$}}
734define <8 x i16> @load_splat_v8i16_from_numeric_address() {
735  %s = inttoptr i32 32 to i16*
736  %e = load i16, i16* %s
737  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
738  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
739  ret <8 x i16> %v2
740}
741
742; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
743; NO-SIMD128-NOT: v128
744; SIMD128-NEXT: .functype load_sext_v8i16_from_numeric_address () -> (v128){{$}}
745; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
746; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
747; SIMD128-NEXT: return $pop[[R]]{{$}}
748define <8 x i16> @load_sext_v8i16_from_numeric_address() {
749  %s = inttoptr i32 32 to <8 x i8>*
750  %v = load <8 x i8>, <8 x i8>* %s
751  %v2 = sext <8 x i8> %v to <8 x i16>
752  ret <8 x i16> %v2
753}
754
755; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
756; NO-SIMD128-NOT: v128
757; SIMD128-NEXT: .functype load_zext_v8i16_from_numeric_address () -> (v128){{$}}
758; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
759; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
760; SIMD128-NEXT: return $pop[[R]]{{$}}
761define <8 x i16> @load_zext_v8i16_from_numeric_address() {
762  %s = inttoptr i32 32 to <8 x i8>*
763  %v = load <8 x i8>, <8 x i8>* %s
764  %v2 = zext <8 x i8> %v to <8 x i16>
765  ret <8 x i16> %v2
766}
767
768; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
769; NO-SIMD128-NOT: load8x8
770; SIMD128-NEXT: .functype load_ext_v8i16_from_numeric_address () -> (v128){{$}}
771; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
772; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
773; SIMD128-NEXT: return $pop[[R]]{{$}}
774define <8 x i8> @load_ext_v8i16_from_numeric_address() {
775  %s = inttoptr i32 32 to <8 x i8>*
776  %v = load <8 x i8>, <8 x i8>* %s
777  ret <8 x i8> %v
778}
779
780; CHECK-LABEL: load_v8i16_from_global_address:
781; NO-SIMD128-NOT: v128
782; SIMD128-NEXT: .functype load_v8i16_from_global_address () -> (v128){{$}}
783; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
784; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v8i16($pop[[L0]]){{$}}
785; SIMD128-NEXT: return $pop[[R]]{{$}}
786@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
787define <8 x i16> @load_v8i16_from_global_address() {
788  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
789  ret <8 x i16> %v
790}
791
792; CHECK-LABEL: load_splat_v8i16_from_global_address:
793; NO-SIMD128-NOT: v128
794; SIMD128-NEXT: .functype load_splat_v8i16_from_global_address () -> (v128){{$}}
795; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
796; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, gv_i16($pop[[L0]]){{$}}
797; SIMD128-NEXT: return $pop[[R]]{{$}}
798@gv_i16 = global i16 42
799define <8 x i16> @load_splat_v8i16_from_global_address() {
800  %e = load i16, i16* @gv_i16
801  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
802  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
803  ret <8 x i16> %v2
804}
805
806; CHECK-LABEL: load_sext_v8i16_from_global_address:
807; NO-SIMD128-NOT: v128
808; SIMD128-NEXT: .functype load_sext_v8i16_from_global_address () -> (v128){{$}}
809; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
810; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}}
811; SIMD128-NEXT: return $pop[[R]]{{$}}
812@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
813define <8 x i16> @load_sext_v8i16_from_global_address() {
814  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
815  %v2 = sext <8 x i8> %v to <8 x i16>
816  ret <8 x i16> %v2
817}
818
819; CHECK-LABEL: load_zext_v8i16_from_global_address:
820; NO-SIMD128-NOT: v128
821; SIMD128-NEXT: .functype load_zext_v8i16_from_global_address () -> (v128){{$}}
822; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
823; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}}
824; SIMD128-NEXT: return $pop[[R]]{{$}}
825define <8 x i16> @load_zext_v8i16_from_global_address() {
826  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
827  %v2 = zext <8 x i8> %v to <8 x i16>
828  ret <8 x i16> %v2
829}
830
831; CHECK-LABEL: load_ext_v8i16_from_global_address:
832; NO-SIMD128-NOT: load8x8
833; SIMD128-NEXT: .functype load_ext_v8i16_from_global_address () -> (v128){{$}}
834; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
835; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}}
836; SIMD128-NEXT: return $pop[[R]]{{$}}
837define <8 x i8> @load_ext_v8i16_from_global_address() {
838  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
839  ret <8 x i8> %v
840}
841
842
843; CHECK-LABEL: store_v8i16:
844; NO-SIMD128-NOT: v128
845; SIMD128-NEXT: .functype store_v8i16 (v128, i32) -> (){{$}}
846; SIMD128-NEXT: v128.store 0($1), $0{{$}}
847define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
848  store <8 x i16> %v , <8 x i16>* %p
849  ret void
850}
851
852; CHECK-LABEL: store_v8i16_with_folded_offset:
853; NO-SIMD128-NOT: v128
854; SIMD128-NEXT: .functype store_v8i16_with_folded_offset (v128, i32) -> (){{$}}
855; SIMD128-NEXT: v128.store 16($1), $0{{$}}
856define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
857  %q = ptrtoint <8 x i16>* %p to i32
858  %r = add nuw i32 %q, 16
859  %s = inttoptr i32 %r to <8 x i16>*
860  store <8 x i16> %v , <8 x i16>* %s
861  ret void
862}
863
864; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
865; NO-SIMD128-NOT: v128
866; SIMD128-NEXT: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> (){{$}}
867; SIMD128-NEXT: v128.store 16($1), $0{{$}}
868define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
869  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
870  store <8 x i16> %v , <8 x i16>* %s
871  ret void
872}
873
874; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
875; NO-SIMD128-NOT: v128
876; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
877; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
878; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
879; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
880define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
881  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
882  store <8 x i16> %v , <8 x i16>* %s
883  ret void
884}
885
886; CHECK-LABEL: store_v8i16_with_unfolded_offset:
887; NO-SIMD128-NOT: v128
888; SIMD128-NEXT: .functype store_v8i16_with_unfolded_offset (v128, i32) -> (){{$}}
889; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
890; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
891; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
892define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
893  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
894  store <8 x i16> %v , <8 x i16>* %s
895  ret void
896}
897
898; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
899; NO-SIMD128-NOT: v128
900; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> (){{$}}
901; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
902; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
903; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
904define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
905  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
906  store <8 x i16> %v , <8 x i16>* %s
907  ret void
908}
909
910; CHECK-LABEL: store_v8i16_to_numeric_address:
911; NO-SIMD128-NOT: v128
912; SIMD128-NEXT: .functype store_v8i16_to_numeric_address (v128) -> (){{$}}
913; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
914; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
915define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
916  %s = inttoptr i32 32 to <8 x i16>*
917  store <8 x i16> %v , <8 x i16>* %s
918  ret void
919}
920
921; CHECK-LABEL: store_v8i16_to_global_address:
922; NO-SIMD128-NOT: v128
923; SIMD128-NEXT: .functype store_v8i16_to_global_address (v128) -> (){{$}}
924; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
925; SIMD128-NEXT: v128.store gv_v8i16($pop[[R]]), $0{{$}}
926define void @store_v8i16_to_global_address(<8 x i16> %v) {
927  store <8 x i16> %v , <8 x i16>* @gv_v8i16
928  ret void
929}
930
931; ==============================================================================
932; 4 x i32
933; ==============================================================================
934; CHECK-LABEL: load_v4i32:
935; NO-SIMD128-NOT: v128
936; SIMD128-NEXT: .functype load_v4i32 (i32) -> (v128){{$}}
937; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
938; SIMD128-NEXT: return $pop[[R]]{{$}}
939define <4 x i32> @load_v4i32(<4 x i32>* %p) {
940  %v = load <4 x i32>, <4 x i32>* %p
941  ret <4 x i32> %v
942}
943
944; CHECK-LABEL: load_splat_v4i32:
945; NO-SIMD128-NOT: v128
946; SIMD128-NEXT: .functype load_splat_v4i32 (i32) -> (v128){{$}}
947; SIMD128-NEXT: v32x4.load_splat
948define <4 x i32> @load_splat_v4i32(i32* %addr) {
949  %e = load i32, i32* %addr, align 4
950  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
951  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
952  ret <4 x i32> %v2
953}
954
955; CHECK-LABEL: load_sext_v4i32:
956; NO-SIMD128-NOT: v128
957; SIMD128-NEXT: .functype load_sext_v4i32 (i32) -> (v128){{$}}
958; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($0){{$}}
959; SIMD128-NEXT: return $pop[[R]]{{$}}
960define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
961  %v = load <4 x i16>, <4 x i16>* %p
962  %v2 = sext <4 x i16> %v to <4 x i32>
963  ret <4 x i32> %v2
964}
965
966; CHECK-LABEL: load_zext_v4i32:
967; NO-SIMD128-NOT: v128
968; SIMD128-NEXT: .functype load_zext_v4i32 (i32) -> (v128){{$}}
969; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($0){{$}}
970; SIMD128-NEXT: return $pop[[R]]{{$}}
971define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
972  %v = load <4 x i16>, <4 x i16>* %p
973  %v2 = zext <4 x i16> %v to <4 x i32>
974  ret <4 x i32> %v2
975}
976
977; CHECK-LABEL: load_ext_v4i32:
978; NO-SIMD128-NOT: load16x4
979; SIMD128-NEXT: .functype load_ext_v4i32 (i32) -> (v128){{$}}
980; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($0){{$}}
981; SIMD128-NEXT: return $pop[[R]]{{$}}
982define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
983  %v = load <4 x i16>, <4 x i16>* %p
984  ret <4 x i16> %v
985}
986
987; CHECK-LABEL: load_v4i32_with_folded_offset:
988; NO-SIMD128-NOT: v128
989; SIMD128-NEXT: .functype load_v4i32_with_folded_offset (i32) -> (v128){{$}}
990; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
991; SIMD128-NEXT: return $pop[[R]]{{$}}
992define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
993  %q = ptrtoint <4 x i32>* %p to i32
994  %r = add nuw i32 %q, 16
995  %s = inttoptr i32 %r to <4 x i32>*
996  %v = load <4 x i32>, <4 x i32>* %s
997  ret <4 x i32> %v
998}
999
1000; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1001; NO-SIMD128-NOT: v128
1002; SIMD128-NEXT: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128){{$}}
1003; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
1004; SIMD128-NEXT: return $pop[[R]]{{$}}
1005define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
1006  %q = ptrtoint i32* %p to i32
1007  %r = add nuw i32 %q, 16
1008  %s = inttoptr i32 %r to i32*
1009  %e = load i32, i32* %s
1010  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1011  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1012  ret <4 x i32> %v2
1013}
1014
1015; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
1016; NO-SIMD128-NOT: v128
1017; SIMD128-NEXT: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128){{$}}
1018; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 16($0){{$}}
1019; SIMD128-NEXT: return $pop[[R]]{{$}}
1020define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
1021  %q = ptrtoint <4 x i16>* %p to i32
1022  %r = add nuw i32 %q, 16
1023  %s = inttoptr i32 %r to <4 x i16>*
1024  %v = load <4 x i16>, <4 x i16>* %s
1025  %v2 = sext <4 x i16> %v to <4 x i32>
1026  ret <4 x i32> %v2
1027}
1028
1029; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
1030; NO-SIMD128-NOT: v128
1031; SIMD128-NEXT: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128){{$}}
1032; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 16($0){{$}}
1033; SIMD128-NEXT: return $pop[[R]]{{$}}
1034define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
1035  %q = ptrtoint <4 x i16>* %p to i32
1036  %r = add nuw i32 %q, 16
1037  %s = inttoptr i32 %r to <4 x i16>*
1038  %v = load <4 x i16>, <4 x i16>* %s
1039  %v2 = zext <4 x i16> %v to <4 x i32>
1040  ret <4 x i32> %v2
1041}
1042
1043; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1044; NO-SIMD128-NOT: load16x4
1045; SIMD128-NEXT: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128){{$}}
1046; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 16($0){{$}}
1047; SIMD128-NEXT: return $pop[[R]]{{$}}
1048define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
1049  %q = ptrtoint <4 x i16>* %p to i32
1050  %r = add nuw i32 %q, 16
1051  %s = inttoptr i32 %r to <4 x i16>*
1052  %v = load <4 x i16>, <4 x i16>* %s
1053  ret <4 x i16> %v
1054}
1055
1056; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1057; NO-SIMD128-NOT: v128
1058; SIMD128-NEXT: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1059; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1060; SIMD128-NEXT: return $pop[[R]]{{$}}
1061define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
1062  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1063  %v = load <4 x i32>, <4 x i32>* %s
1064  ret <4 x i32> %v
1065}
1066
1067; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1068; NO-SIMD128-NOT: v128
1069; SIMD128-NEXT: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1070; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 4($0){{$}}
1071; SIMD128-NEXT: return $pop[[R]]{{$}}
1072define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
1073  %s = getelementptr inbounds i32, i32* %p, i32 1
1074  %e = load i32, i32* %s
1075  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1076  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1077  ret <4 x i32> %v2
1078}
1079
1080; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
1081; NO-SIMD128-NOT: v128
1082; SIMD128-NEXT: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1083; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 8($0){{$}}
1084; SIMD128-NEXT: return $pop[[R]]{{$}}
1085define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1086  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1087  %v = load <4 x i16>, <4 x i16>* %s
1088  %v2 = sext <4 x i16> %v to <4 x i32>
1089  ret <4 x i32> %v2
1090}
1091
1092; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
1093; NO-SIMD128-NOT: v128
1094; SIMD128-NEXT: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1095; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 8($0){{$}}
1096; SIMD128-NEXT: return $pop[[R]]{{$}}
1097define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1098  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1099  %v = load <4 x i16>, <4 x i16>* %s
1100  %v2 = zext <4 x i16> %v to <4 x i32>
1101  ret <4 x i32> %v2
1102}
1103
1104; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1105; NO-SIMD128-NOT: load16x4
1106; SIMD128-NEXT: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1107; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 8($0){{$}}
1108; SIMD128-NEXT: return $pop[[R]]{{$}}
1109define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1110  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1111  %v = load <4 x i16>, <4 x i16>* %s
1112  ret <4 x i16> %v
1113}
1114
1115; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1116; NO-SIMD128-NOT: v128
1117; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1118; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1119; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1120; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1121; SIMD128-NEXT: return $pop[[R]]{{$}}
1122define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
1123  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1124  %v = load <4 x i32>, <4 x i32>* %s
1125  ret <4 x i32> %v
1126}
1127
1128; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1129; NO-SIMD128-NOT: v128
1130; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1131; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -4{{$}}
1132; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1133; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1134; SIMD128-NEXT: return $pop[[R]]{{$}}
1135define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
1136  %s = getelementptr inbounds i32, i32* %p, i32 -1
1137  %e = load i32, i32* %s
1138  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1139  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1140  ret <4 x i32> %v2
1141}
1142
1143; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
1144; NO-SIMD128-NOT: v128
1145; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1146; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1147; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1148; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1149; SIMD128-NEXT: return $pop[[R]]{{$}}
1150define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1151  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1152  %v = load <4 x i16>, <4 x i16>* %s
1153  %v2 = sext <4 x i16> %v to <4 x i32>
1154  ret <4 x i32> %v2
1155}
1156
1157; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
1158; NO-SIMD128-NOT: v128
1159; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1160; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1161; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1162; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1163; SIMD128-NEXT: return $pop[[R]]{{$}}
1164define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1165  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1166  %v = load <4 x i16>, <4 x i16>* %s
1167  %v2 = zext <4 x i16> %v to <4 x i32>
1168  ret <4 x i32> %v2
1169}
1170
1171; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1172; NO-SIMD128-NOT: load16x4
1173; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1174; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1175; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1176; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1177; SIMD128-NEXT: return $pop[[R]]{{$}}
1178define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1179  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1180  %v = load <4 x i16>, <4 x i16>* %s
1181  ret <4 x i16> %v
1182}
1183
1184; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1185; NO-SIMD128-NOT: v128
1186; SIMD128-NEXT: .functype load_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1187; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1188; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1189; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1190; SIMD128-NEXT: return $pop[[R]]{{$}}
1191define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
1192  %q = ptrtoint <4 x i32>* %p to i32
1193  %r = add nsw i32 %q, 16
1194  %s = inttoptr i32 %r to <4 x i32>*
1195  %v = load <4 x i32>, <4 x i32>* %s
1196  ret <4 x i32> %v
1197}
1198
1199; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1200; NO-SIMD128-NOT: v128
1201; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1202; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1203; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1204; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1205; SIMD128-NEXT: return $pop[[R]]{{$}}
1206define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
1207  %q = ptrtoint i32* %p to i32
1208  %r = add nsw i32 %q, 16
1209  %s = inttoptr i32 %r to i32*
1210  %e = load i32, i32* %s
1211  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1212  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1213  ret <4 x i32> %v2
1214}
1215
1216; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
1217; NO-SIMD128-NOT: v128
1218; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1219; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1220; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1221; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1222; SIMD128-NEXT: return $pop[[R]]{{$}}
1223define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1224  %q = ptrtoint <4 x i16>* %p to i32
1225  %r = add nsw i32 %q, 16
1226  %s = inttoptr i32 %r to <4 x i16>*
1227  %v = load <4 x i16>, <4 x i16>* %s
1228  %v2 = sext <4 x i16> %v to <4 x i32>
1229  ret <4 x i32> %v2
1230}
1231
1232; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
1233; NO-SIMD128-NOT: v128
1234; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1235; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1236; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1237; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1238; SIMD128-NEXT: return $pop[[R]]{{$}}
1239define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1240  %q = ptrtoint <4 x i16>* %p to i32
1241  %r = add nsw i32 %q, 16
1242  %s = inttoptr i32 %r to <4 x i16>*
1243  %v = load <4 x i16>, <4 x i16>* %s
1244  %v2 = zext <4 x i16> %v to <4 x i32>
1245  ret <4 x i32> %v2
1246}
1247
1248; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1249; NO-SIMD128-NOT: load16x4
1250; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1251; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1252; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1253; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1254; SIMD128-NEXT: return $pop[[R]]{{$}}
1255define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1256  %q = ptrtoint <4 x i16>* %p to i32
1257  %r = add nsw i32 %q, 16
1258  %s = inttoptr i32 %r to <4 x i16>*
1259  %v = load <4 x i16>, <4 x i16>* %s
1260  ret <4 x i16> %v
1261}
1262
1263; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1264; NO-SIMD128-NOT: v128
1265; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1266; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1267; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1268; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1269; SIMD128-NEXT: return $pop[[R]]{{$}}
1270define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
1271  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1272  %v = load <4 x i32>, <4 x i32>* %s
1273  ret <4 x i32> %v
1274}
1275
1276; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1277; NO-SIMD128-NOT: v128
1278; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1279; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 4{{$}}
1280; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1281; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1282; SIMD128-NEXT: return $pop[[R]]{{$}}
1283define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
1284  %s = getelementptr i32, i32* %p, i32 1
1285  %e = load i32, i32* %s
1286  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1287  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1288  ret <4 x i32> %v2
1289}
1290
1291; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
1292; NO-SIMD128-NOT: v128
1293; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1294; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1295; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1296; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1297; SIMD128-NEXT: return $pop[[R]]{{$}}
1298define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1299  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1300  %v = load <4 x i16>, <4 x i16>* %s
1301  %v2 = sext <4 x i16> %v to <4 x i32>
1302  ret <4 x i32> %v2
1303}
1304
1305; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
1306; NO-SIMD128-NOT: v128
1307; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1308; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1309; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1310; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1311; SIMD128-NEXT: return $pop[[R]]{{$}}
1312define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1313  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1314  %v = load <4 x i16>, <4 x i16>* %s
1315  %v2 = zext <4 x i16> %v to <4 x i32>
1316  ret <4 x i32> %v2
1317}
1318
1319; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1320; NO-SIMD128-NOT: load16x4
1321; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1322; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1323; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1324; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1325; SIMD128-NEXT: return $pop[[R]]{{$}}
1326define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1327  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1328  %v = load <4 x i16>, <4 x i16>* %s
1329  ret <4 x i16> %v
1330}
1331
1332; CHECK-LABEL: load_v4i32_from_numeric_address:
1333; NO-SIMD128-NOT: v128
1334; SIMD128-NEXT: .functype load_v4i32_from_numeric_address () -> (v128){{$}}
1335; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1336; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1337; SIMD128-NEXT: return $pop[[R]]{{$}}
1338define <4 x i32> @load_v4i32_from_numeric_address() {
1339  %s = inttoptr i32 32 to <4 x i32>*
1340  %v = load <4 x i32>, <4 x i32>* %s
1341  ret <4 x i32> %v
1342}
1343
1344; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1345; NO-SIMD128-NOT: v128
1346; SIMD128-NEXT: .functype load_splat_v4i32_from_numeric_address () -> (v128){{$}}
1347; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1348; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1349; SIMD128-NEXT: return $pop[[R]]{{$}}
1350define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1351  %s = inttoptr i32 32 to i32*
1352  %e = load i32, i32* %s
1353  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1354  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1355  ret <4 x i32> %v2
1356}
1357
1358; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
1359; NO-SIMD128-NOT: v128
1360; SIMD128-NEXT: .functype load_sext_v4i32_from_numeric_address () -> (v128){{$}}
1361; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1362; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1363; SIMD128-NEXT: return $pop[[R]]{{$}}
1364define <4 x i32> @load_sext_v4i32_from_numeric_address() {
1365  %s = inttoptr i32 32 to <4 x i16>*
1366  %v = load <4 x i16>, <4 x i16>* %s
1367  %v2 = sext <4 x i16> %v to <4 x i32>
1368  ret <4 x i32> %v2
1369}
1370
1371; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
1372; NO-SIMD128-NOT: v128
1373; SIMD128-NEXT: .functype load_zext_v4i32_from_numeric_address () -> (v128){{$}}
1374; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1375; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1376; SIMD128-NEXT: return $pop[[R]]{{$}}
1377define <4 x i32> @load_zext_v4i32_from_numeric_address() {
1378  %s = inttoptr i32 32 to <4 x i16>*
1379  %v = load <4 x i16>, <4 x i16>* %s
1380  %v2 = zext <4 x i16> %v to <4 x i32>
1381  ret <4 x i32> %v2
1382}
1383
1384; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1385; NO-SIMD128-NOT: load16x4
1386; SIMD128-NEXT: .functype load_ext_v4i32_from_numeric_address () -> (v128){{$}}
1387; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1388; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1389; SIMD128-NEXT: return $pop[[R]]{{$}}
1390define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1391  %s = inttoptr i32 32 to <4 x i16>*
1392  %v = load <4 x i16>, <4 x i16>* %s
1393  ret <4 x i16> %v
1394}
1395
1396; CHECK-LABEL: load_v4i32_from_global_address:
1397; NO-SIMD128-NOT: v128
1398; SIMD128-NEXT: .functype load_v4i32_from_global_address () -> (v128){{$}}
1399; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1400; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4i32($pop[[L0]]){{$}}
1401; SIMD128-NEXT: return $pop[[R]]{{$}}
1402@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1403define <4 x i32> @load_v4i32_from_global_address() {
1404  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
1405  ret <4 x i32> %v
1406}
1407
1408; CHECK-LABEL: load_splat_v4i32_from_global_address:
1409; NO-SIMD128-NOT: v128
1410; SIMD128-NEXT: .functype load_splat_v4i32_from_global_address () -> (v128){{$}}
1411; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1412; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, gv_i32($pop[[L0]]){{$}}
1413; SIMD128-NEXT: return $pop[[R]]{{$}}
1414@gv_i32 = global i32 42
1415define <4 x i32> @load_splat_v4i32_from_global_address() {
1416  %e = load i32, i32* @gv_i32
1417  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1418  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1419  ret <4 x i32> %v2
1420}
1421
1422; CHECK-LABEL: load_sext_v4i32_from_global_address:
1423; NO-SIMD128-NOT: v128
1424; SIMD128-NEXT: .functype load_sext_v4i32_from_global_address () -> (v128){{$}}
1425; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1426; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}}
1427; SIMD128-NEXT: return $pop[[R]]{{$}}
1428@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1429define <4 x i32> @load_sext_v4i32_from_global_address() {
1430  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1431  %v2 = sext <4 x i16> %v to <4 x i32>
1432  ret <4 x i32> %v2
1433}
1434
1435; CHECK-LABEL: load_zext_v4i32_from_global_address:
1436; NO-SIMD128-NOT: v128
1437; SIMD128-NEXT: .functype load_zext_v4i32_from_global_address () -> (v128){{$}}
1438; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1439; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}}
1440; SIMD128-NEXT: return $pop[[R]]{{$}}
1441define <4 x i32> @load_zext_v4i32_from_global_address() {
1442  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1443  %v2 = zext <4 x i16> %v to <4 x i32>
1444  ret <4 x i32> %v2
1445}
1446
1447; CHECK-LABEL: load_ext_v4i32_from_global_address:
1448; NO-SIMD128-NOT: load16x4
1449; SIMD128-NEXT: .functype load_ext_v4i32_from_global_address () -> (v128){{$}}
1450; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1451; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}}
1452; SIMD128-NEXT: return $pop[[R]]{{$}}
1453define <4 x i16> @load_ext_v4i32_from_global_address() {
1454  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1455  ret <4 x i16> %v
1456}
1457
1458; CHECK-LABEL: store_v4i32:
1459; NO-SIMD128-NOT: v128
1460; SIMD128-NEXT: .functype store_v4i32 (v128, i32) -> (){{$}}
1461; SIMD128-NEXT: v128.store 0($1), $0{{$}}
1462define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
1463  store <4 x i32> %v , <4 x i32>* %p
1464  ret void
1465}
1466
1467; CHECK-LABEL: store_v4i32_with_folded_offset:
1468; NO-SIMD128-NOT: v128
1469; SIMD128-NEXT: .functype store_v4i32_with_folded_offset (v128, i32) -> (){{$}}
1470; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1471define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
1472  %q = ptrtoint <4 x i32>* %p to i32
1473  %r = add nuw i32 %q, 16
1474  %s = inttoptr i32 %r to <4 x i32>*
1475  store <4 x i32> %v , <4 x i32>* %s
1476  ret void
1477}
1478
1479; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
1480; NO-SIMD128-NOT: v128
1481; SIMD128-NEXT: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> (){{$}}
1482; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1483define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1484  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1485  store <4 x i32> %v , <4 x i32>* %s
1486  ret void
1487}
1488
1489; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
1490; NO-SIMD128-NOT: v128
1491; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
1492; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1493; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1494; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1495define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
1496  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1497  store <4 x i32> %v , <4 x i32>* %s
1498  ret void
1499}
1500
1501; CHECK-LABEL: store_v4i32_with_unfolded_offset:
1502; NO-SIMD128-NOT: v128
1503; SIMD128-NEXT: .functype store_v4i32_with_unfolded_offset (v128, i32) -> (){{$}}
1504; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1505; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1506; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1507define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
1508  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1509  store <4 x i32> %v , <4 x i32>* %s
1510  ret void
1511}
1512
1513; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
1514; NO-SIMD128-NOT: v128
1515; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> (){{$}}
1516; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1517; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1518; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1519define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1520  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1521  store <4 x i32> %v , <4 x i32>* %s
1522  ret void
1523}
1524
1525; CHECK-LABEL: store_v4i32_to_numeric_address:
1526; NO-SIMD128-NOT: v128
1527; SIMD128-NEXT: .functype store_v4i32_to_numeric_address (v128) -> (){{$}}
1528; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1529; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
1530define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
1531  %s = inttoptr i32 32 to <4 x i32>*
1532  store <4 x i32> %v , <4 x i32>* %s
1533  ret void
1534}
1535
1536; CHECK-LABEL: store_v4i32_to_global_address:
1537; NO-SIMD128-NOT: v128
1538; SIMD128-NEXT: .functype store_v4i32_to_global_address (v128) -> (){{$}}
1539; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
1540; SIMD128-NEXT: v128.store gv_v4i32($pop[[R]]), $0{{$}}
1541define void @store_v4i32_to_global_address(<4 x i32> %v) {
1542  store <4 x i32> %v , <4 x i32>* @gv_v4i32
1543  ret void
1544}
1545
1546; ==============================================================================
1547; 2 x i64
1548; ==============================================================================
1549; CHECK-LABEL: load_v2i64:
1550; NO-SIMD128-NOT: v128
1551; SIMD128-NEXT: .functype load_v2i64 (i32) -> (v128){{$}}
1552; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
1553; SIMD128-NEXT: return $pop[[R]]{{$}}
1554define <2 x i64> @load_v2i64(<2 x i64>* %p) {
1555  %v = load <2 x i64>, <2 x i64>* %p
1556  ret <2 x i64> %v
1557}
1558
1559; CHECK-LABEL: load_splat_v2i64:
1560; NO-SIMD128-NOT: v128
1561; SIMD128-NEXT: .functype load_splat_v2i64 (i32) -> (v128){{$}}
1562; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
1563; SIMD128-NEXT: return $pop[[R]]{{$}}
1564define <2 x i64> @load_splat_v2i64(i64* %p) {
1565  %e = load i64, i64* %p
1566  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1567  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1568  ret <2 x i64> %v2
1569}
1570
1571; CHECK-LABEL: load_sext_v2i64:
1572; NO-SIMD128-NOT: v128
1573; SIMD128-NEXT: .functype load_sext_v2i64 (i32) -> (v128){{$}}
1574; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($0){{$}}
1575; SIMD128-NEXT: return $pop[[R]]{{$}}
1576define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
1577  %v = load <2 x i32>, <2 x i32>* %p
1578  %v2 = sext <2 x i32> %v to <2 x i64>
1579  ret <2 x i64> %v2
1580}
1581
1582; CHECK-LABEL: load_zext_v2i64:
1583; NO-SIMD128-NOT: v128
1584; SIMD128-NEXT: .functype load_zext_v2i64 (i32) -> (v128){{$}}
1585; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($0){{$}}
1586; SIMD128-NEXT: return $pop[[R]]{{$}}
1587define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
1588  %v = load <2 x i32>, <2 x i32>* %p
1589  %v2 = zext <2 x i32> %v to <2 x i64>
1590  ret <2 x i64> %v2
1591}
1592
1593; CHECK-LABEL: load_ext_v2i64:
1594; NO-SIMD128-NOT: v128
1595; SIMD128-NEXT: .functype load_ext_v2i64 (i32) -> (v128){{$}}
1596; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($0){{$}}
1597; SIMD128-NEXT: return $pop[[R]]{{$}}
1598define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
1599  %v = load <2 x i32>, <2 x i32>* %p
1600  ret <2 x i32> %v
1601}
1602
1603; CHECK-LABEL: load_v2i64_with_folded_offset:
1604; NO-SIMD128-NOT: v128
1605; SIMD128-NEXT: .functype load_v2i64_with_folded_offset (i32) -> (v128){{$}}
1606; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1607; SIMD128-NEXT: return $pop[[R]]{{$}}
1608define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
1609  %q = ptrtoint <2 x i64>* %p to i32
1610  %r = add nuw i32 %q, 16
1611  %s = inttoptr i32 %r to <2 x i64>*
1612  %v = load <2 x i64>, <2 x i64>* %s
1613  ret <2 x i64> %v
1614}
1615
1616; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
1617; NO-SIMD128-NOT: v128
1618; SIMD128-NEXT: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128){{$}}
1619; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
1620; SIMD128-NEXT: return $pop[[R]]{{$}}
1621define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
1622  %q = ptrtoint i64* %p to i32
1623  %r = add nuw i32 %q, 16
1624  %s = inttoptr i32 %r to i64*
1625  %e = load i64, i64* %s
1626  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1627  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1628  ret <2 x i64> %v2
1629}
1630
1631; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
1632; NO-SIMD128-NOT: v128
1633; SIMD128-NEXT: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128){{$}}
1634; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 16($0){{$}}
1635; SIMD128-NEXT: return $pop[[R]]{{$}}
1636define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
1637  %q = ptrtoint <2 x i32>* %p to i32
1638  %r = add nuw i32 %q, 16
1639  %s = inttoptr i32 %r to <2 x i32>*
1640  %v = load <2 x i32>, <2 x i32>* %s
1641  %v2 = sext <2 x i32> %v to <2 x i64>
1642  ret <2 x i64> %v2
1643}
1644
1645; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
1646; NO-SIMD128-NOT: v128
1647; SIMD128-NEXT: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128){{$}}
1648; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 16($0){{$}}
1649; SIMD128-NEXT: return $pop[[R]]{{$}}
1650define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
1651  %q = ptrtoint <2 x i32>* %p to i32
1652  %r = add nuw i32 %q, 16
1653  %s = inttoptr i32 %r to <2 x i32>*
1654  %v = load <2 x i32>, <2 x i32>* %s
1655  %v2 = zext <2 x i32> %v to <2 x i64>
1656  ret <2 x i64> %v2
1657}
1658
1659; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
1660; NO-SIMD128-NOT: v128
1661; SIMD128-NEXT: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128){{$}}
1662; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 16($0){{$}}
1663; SIMD128-NEXT: return $pop[[R]]{{$}}
1664define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
1665  %q = ptrtoint <2 x i32>* %p to i32
1666  %r = add nuw i32 %q, 16
1667  %s = inttoptr i32 %r to <2 x i32>*
1668  %v = load <2 x i32>, <2 x i32>* %s
1669  ret <2 x i32> %v
1670}
1671
1672; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
1673; NO-SIMD128-NOT: v128
1674; SIMD128-NEXT: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1675; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1676; SIMD128-NEXT: return $pop[[R]]{{$}}
1677define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
1678  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
1679  %v = load <2 x i64>, <2 x i64>* %s
1680  ret <2 x i64> %v
1681}
1682
1683; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
1684; NO-SIMD128-NOT: v128
1685; SIMD128-NEXT: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1686; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 8($0){{$}}
1687; SIMD128-NEXT: return $pop[[R]]{{$}}
1688define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
1689  %s = getelementptr inbounds i64, i64* %p, i32 1
1690  %e = load i64, i64* %s
1691  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1692  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1693  ret <2 x i64> %v2
1694}
1695
1696; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
1697; NO-SIMD128-NOT: v128
1698; SIMD128-NEXT: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1699; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 8($0){{$}}
1700; SIMD128-NEXT: return $pop[[R]]{{$}}
1701define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1702  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1703  %v = load <2 x i32>, <2 x i32>* %s
1704  %v2 = sext <2 x i32> %v to <2 x i64>
1705  ret <2 x i64> %v2
1706}
1707
1708; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
1709; NO-SIMD128-NOT: v128
1710; SIMD128-NEXT: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1711; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 8($0){{$}}
1712; SIMD128-NEXT: return $pop[[R]]{{$}}
1713define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1714  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1715  %v = load <2 x i32>, <2 x i32>* %s
1716  %v2 = zext <2 x i32> %v to <2 x i64>
1717  ret <2 x i64> %v2
1718}
1719
1720; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
1721; NO-SIMD128-NOT: v128
1722; SIMD128-NEXT: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1723; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 8($0){{$}}
1724; SIMD128-NEXT: return $pop[[R]]{{$}}
1725define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1726  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1727  %v = load <2 x i32>, <2 x i32>* %s
1728  ret <2 x i32> %v
1729}
1730
1731; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
1732; NO-SIMD128-NOT: v128
1733; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1734; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1735; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1736; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1737; SIMD128-NEXT: return $pop[[R]]{{$}}
1738define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
1739  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
1740  %v = load <2 x i64>, <2 x i64>* %s
1741  ret <2 x i64> %v
1742}
1743
1744; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
1745; NO-SIMD128-NOT: v128
1746; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1747; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1748; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1749; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1750; SIMD128-NEXT: return $pop[[R]]{{$}}
1751define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
1752  %s = getelementptr inbounds i64, i64* %p, i32 -1
1753  %e = load i64, i64* %s
1754  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1755  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1756  ret <2 x i64> %v2
1757}
1758
1759; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
1760; NO-SIMD128-NOT: v128
1761; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1762; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1763; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1764; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1765; SIMD128-NEXT: return $pop[[R]]{{$}}
1766define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1767  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1768  %v = load <2 x i32>, <2 x i32>* %s
1769  %v2 = sext <2 x i32> %v to <2 x i64>
1770  ret <2 x i64> %v2
1771}
1772
1773; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
1774; NO-SIMD128-NOT: v128
1775; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1776; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1777; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1778; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1779; SIMD128-NEXT: return $pop[[R]]{{$}}
1780define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1781  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1782  %v = load <2 x i32>, <2 x i32>* %s
1783  %v2 = zext <2 x i32> %v to <2 x i64>
1784  ret <2 x i64> %v2
1785}
1786
1787; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
1788; NO-SIMD128-NOT: v128
1789; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1790; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1791; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1792; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1793; SIMD128-NEXT: return $pop[[R]]{{$}}
1794define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1795  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1796  %v = load <2 x i32>, <2 x i32>* %s
1797  ret <2 x i32> %v
1798}
1799
1800; CHECK-LABEL: load_v2i64_with_unfolded_offset:
1801; NO-SIMD128-NOT: v128
1802; SIMD128-NEXT: .functype load_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1803; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1804; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1805; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1806; SIMD128-NEXT: return $pop[[R]]{{$}}
1807define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
1808  %q = ptrtoint <2 x i64>* %p to i32
1809  %r = add nsw i32 %q, 16
1810  %s = inttoptr i32 %r to <2 x i64>*
1811  %v = load <2 x i64>, <2 x i64>* %s
1812  ret <2 x i64> %v
1813}
1814
1815; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
1816; NO-SIMD128-NOT: v128
1817; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1818; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1819; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1820; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1821; SIMD128-NEXT: return $pop[[R]]{{$}}
1822define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
1823  %q = ptrtoint i64* %p to i32
1824  %r = add nsw i32 %q, 16
1825  %s = inttoptr i32 %r to i64*
1826  %e = load i64, i64* %s
1827  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1828  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1829  ret <2 x i64> %v2
1830}
1831
1832; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
1833; NO-SIMD128-NOT: v128
1834; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1835; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1836; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1837; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1838; SIMD128-NEXT: return $pop[[R]]{{$}}
1839define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
1840  %q = ptrtoint <2 x i32>* %p to i32
1841  %r = add nsw i32 %q, 16
1842  %s = inttoptr i32 %r to <2 x i32>*
1843  %v = load <2 x i32>, <2 x i32>* %s
1844  %v2 = sext <2 x i32> %v to <2 x i64>
1845  ret <2 x i64> %v2
1846}
1847
1848; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
1849; NO-SIMD128-NOT: v128
1850; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1851; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1852; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1853; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1854; SIMD128-NEXT: return $pop[[R]]{{$}}
1855define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
1856  %q = ptrtoint <2 x i32>* %p to i32
1857  %r = add nsw i32 %q, 16
1858  %s = inttoptr i32 %r to <2 x i32>*
1859  %v = load <2 x i32>, <2 x i32>* %s
1860  %v2 = zext <2 x i32> %v to <2 x i64>
1861  ret <2 x i64> %v2
1862}
1863
1864; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
1865; NO-SIMD128-NOT: v128
1866; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1867; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1868; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1869; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1870; SIMD128-NEXT: return $pop[[R]]{{$}}
1871define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
1872  %q = ptrtoint <2 x i32>* %p to i32
1873  %r = add nsw i32 %q, 16
1874  %s = inttoptr i32 %r to <2 x i32>*
1875  %v = load <2 x i32>, <2 x i32>* %s
1876  ret <2 x i32> %v
1877}
1878
1879; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
1880; NO-SIMD128-NOT: v128
1881; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1882; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1883; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1884; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1885; SIMD128-NEXT: return $pop[[R]]{{$}}
1886define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
1887  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
1888  %v = load <2 x i64>, <2 x i64>* %s
1889  ret <2 x i64> %v
1890}
1891
1892; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
1893; NO-SIMD128-NOT: v128
1894; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1895; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1896; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1897; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1898; SIMD128-NEXT: return $pop[[R]]{{$}}
1899define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
1900  %s = getelementptr i64, i64* %p, i32 1
1901  %e = load i64, i64* %s
1902  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1903  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1904  ret <2 x i64> %v2
1905}
1906
1907; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
1908; NO-SIMD128-NOT: v128
1909; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1910; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1911; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1912; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1913; SIMD128-NEXT: return $pop[[R]]{{$}}
1914define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
1915  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
1916  %v = load <2 x i32>, <2 x i32>* %s
1917  %v2 = sext <2 x i32> %v to <2 x i64>
1918  ret <2 x i64> %v2
1919}
1920
1921; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
1922; NO-SIMD128-NOT: v128
1923; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1924; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1925; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1926; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1927; SIMD128-NEXT: return $pop[[R]]{{$}}
1928define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
1929  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
1930  %v = load <2 x i32>, <2 x i32>* %s
1931  %v2 = zext <2 x i32> %v to <2 x i64>
1932  ret <2 x i64> %v2
1933}
1934
1935; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
1936; NO-SIMD128-NOT: v128
1937; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1938; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1939; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1940; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1941; SIMD128-NEXT: return $pop[[R]]{{$}}
1942define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
1943  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
1944  %v = load <2 x i32>, <2 x i32>* %s
1945  ret <2 x i32> %v
1946}
1947
1948; CHECK-LABEL: load_v2i64_from_numeric_address:
1949; NO-SIMD128-NOT: v128
1950; SIMD128-NEXT: .functype load_v2i64_from_numeric_address () -> (v128){{$}}
1951; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1952; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1953; SIMD128-NEXT: return $pop[[R]]{{$}}
1954define <2 x i64> @load_v2i64_from_numeric_address() {
1955  %s = inttoptr i32 32 to <2 x i64>*
1956  %v = load <2 x i64>, <2 x i64>* %s
1957  ret <2 x i64> %v
1958}
1959
1960; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
1961; NO-SIMD128-NOT: v128
1962; SIMD128-NEXT: .functype load_splat_v2i64_from_numeric_address () -> (v128){{$}}
1963; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1964; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1965; SIMD128-NEXT: return $pop[[R]]{{$}}
1966define <2 x i64> @load_splat_v2i64_from_numeric_address() {
1967  %s = inttoptr i32 32 to i64*
1968  %e = load i64, i64* %s
1969  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1970  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1971  ret <2 x i64> %v2
1972}
1973
1974; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
1975; NO-SIMD128-NOT: v128
1976; SIMD128-NEXT: .functype load_sext_v2i64_from_numeric_address () -> (v128){{$}}
1977; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1978; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1979; SIMD128-NEXT: return $pop[[R]]{{$}}
1980define <2 x i64> @load_sext_v2i64_from_numeric_address() {
1981  %s = inttoptr i32 32 to <2 x i32>*
1982  %v = load <2 x i32>, <2 x i32>* %s
1983  %v2 = sext <2 x i32> %v to <2 x i64>
1984  ret <2 x i64> %v2
1985}
1986
1987; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
1988; NO-SIMD128-NOT: v128
1989; SIMD128-NEXT: .functype load_zext_v2i64_from_numeric_address () -> (v128){{$}}
1990; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1991; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1992; SIMD128-NEXT: return $pop[[R]]{{$}}
1993define <2 x i64> @load_zext_v2i64_from_numeric_address() {
1994  %s = inttoptr i32 32 to <2 x i32>*
1995  %v = load <2 x i32>, <2 x i32>* %s
1996  %v2 = zext <2 x i32> %v to <2 x i64>
1997  ret <2 x i64> %v2
1998}
1999
2000; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2001; NO-SIMD128-NOT: v128
2002; SIMD128-NEXT: .functype load_ext_v2i64_from_numeric_address () -> (v128){{$}}
2003; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2004; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2005; SIMD128-NEXT: return $pop[[R]]{{$}}
2006define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2007  %s = inttoptr i32 32 to <2 x i32>*
2008  %v = load <2 x i32>, <2 x i32>* %s
2009  ret <2 x i32> %v
2010}
2011
2012; CHECK-LABEL: load_v2i64_from_global_address:
2013; NO-SIMD128-NOT: v128
2014; SIMD128-NEXT: .functype load_v2i64_from_global_address () -> (v128){{$}}
2015; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2016; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2i64($pop[[L0]]){{$}}
2017; SIMD128-NEXT: return $pop[[R]]{{$}}
2018@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2019define <2 x i64> @load_v2i64_from_global_address() {
2020  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
2021  ret <2 x i64> %v
2022}
2023
2024; CHECK-LABEL: load_splat_v2i64_from_global_address:
2025; NO-SIMD128-NOT: v128
2026; SIMD128-NEXT: .functype load_splat_v2i64_from_global_address () -> (v128){{$}}
2027; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2028; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, gv_i64($pop[[L0]]){{$}}
2029; SIMD128-NEXT: return $pop[[R]]{{$}}
2030@gv_i64 = global i64 42
2031define <2 x i64> @load_splat_v2i64_from_global_address() {
2032  %e = load i64, i64* @gv_i64
2033  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2034  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2035  ret <2 x i64> %v2
2036}
2037
2038; CHECK-LABEL: load_sext_v2i64_from_global_address:
2039; NO-SIMD128-NOT: v128
2040; SIMD128-NEXT: .functype load_sext_v2i64_from_global_address () -> (v128){{$}}
2041; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2042; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}}
2043; SIMD128-NEXT: return $pop[[R]]{{$}}
2044@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2045define <2 x i64> @load_sext_v2i64_from_global_address() {
2046  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2047  %v2 = sext <2 x i32> %v to <2 x i64>
2048  ret <2 x i64> %v2
2049}
2050
2051; CHECK-LABEL: load_zext_v2i64_from_global_address:
2052; NO-SIMD128-NOT: v128
2053; SIMD128-NEXT: .functype load_zext_v2i64_from_global_address () -> (v128){{$}}
2054; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2055; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}}
2056; SIMD128-NEXT: return $pop[[R]]{{$}}
2057define <2 x i64> @load_zext_v2i64_from_global_address() {
2058  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2059  %v2 = zext <2 x i32> %v to <2 x i64>
2060  ret <2 x i64> %v2
2061}
2062
2063; CHECK-LABEL: load_ext_v2i64_from_global_address:
2064; NO-SIMD128-NOT: v128
2065; SIMD128-NEXT: .functype load_ext_v2i64_from_global_address () -> (v128){{$}}
2066; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2067; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}}
2068; SIMD128-NEXT: return $pop[[R]]{{$}}
2069define <2 x i32> @load_ext_v2i64_from_global_address() {
2070  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2071  ret <2 x i32> %v
2072}
2073
2074; CHECK-LABEL: store_v2i64:
2075; NO-SIMD128-NOT: v128
2076; SIMD128-NEXT: .functype store_v2i64 (v128, i32) -> (){{$}}
2077; SIMD128-NEXT: v128.store 0($1), $0{{$}}
2078define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
2079  store <2 x i64> %v , <2 x i64>* %p
2080  ret void
2081}
2082
2083; CHECK-LABEL: store_v2i64_with_folded_offset:
2084; NO-SIMD128-NOT: v128
2085; SIMD128-NEXT: .functype store_v2i64_with_folded_offset (v128, i32) -> (){{$}}
2086; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2087define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
2088  %q = ptrtoint <2 x i64>* %p to i32
2089  %r = add nuw i32 %q, 16
2090  %s = inttoptr i32 %r to <2 x i64>*
2091  store <2 x i64> %v , <2 x i64>* %s
2092  ret void
2093}
2094
2095; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2096; NO-SIMD128-NOT: v128
2097; SIMD128-NEXT: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> (){{$}}
2098; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2099define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2100  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2101  store <2 x i64> %v , <2 x i64>* %s
2102  ret void
2103}
2104
2105; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2106; NO-SIMD128-NOT: v128
2107; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
2108; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2109; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2110; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2111define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
2112  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2113  store <2 x i64> %v , <2 x i64>* %s
2114  ret void
2115}
2116
2117; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2118; NO-SIMD128-NOT: v128
2119; SIMD128-NEXT: .functype store_v2i64_with_unfolded_offset (v128, i32) -> (){{$}}
2120; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2121; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2122; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2123define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
2124  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2125  store <2 x i64> %v , <2 x i64>* %s
2126  ret void
2127}
2128
2129; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2130; NO-SIMD128-NOT: v128
2131; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> (){{$}}
2132; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2133; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2134; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2135define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2136  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2137  store <2 x i64> %v , <2 x i64>* %s
2138  ret void
2139}
2140
2141; CHECK-LABEL: store_v2i64_to_numeric_address:
2142; NO-SIMD128-NOT: v128
2143; SIMD128-NEXT: .functype store_v2i64_to_numeric_address (v128) -> (){{$}}
2144; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2145; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
2146define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2147  %s = inttoptr i32 32 to <2 x i64>*
2148  store <2 x i64> %v , <2 x i64>* %s
2149  ret void
2150}
2151
2152; CHECK-LABEL: store_v2i64_to_global_address:
2153; NO-SIMD128-NOT: v128
2154; SIMD128-NEXT: .functype store_v2i64_to_global_address (v128) -> (){{$}}
2155; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
2156; SIMD128-NEXT: v128.store gv_v2i64($pop[[R]]), $0{{$}}
2157define void @store_v2i64_to_global_address(<2 x i64> %v) {
2158  store <2 x i64> %v , <2 x i64>* @gv_v2i64
2159  ret void
2160}
2161
2162; ==============================================================================
2163; 4 x float
2164; ==============================================================================
2165; CHECK-LABEL: load_v4f32:
2166; NO-SIMD128-NOT: v128
2167; SIMD128-NEXT: .functype load_v4f32 (i32) -> (v128){{$}}
2168; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
2169; SIMD128-NEXT: return $pop[[R]]{{$}}
2170define <4 x float> @load_v4f32(<4 x float>* %p) {
2171  %v = load <4 x float>, <4 x float>* %p
2172  ret <4 x float> %v
2173}
2174
2175; CHECK-LABEL: load_splat_v4f32:
2176; NO-SIMD128-NOT: v128
2177; SIMD128-NEXT: .functype load_splat_v4f32 (i32) -> (v128){{$}}
2178; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
2179; SIMD128-NEXT: return $pop[[R]]{{$}}
2180define <4 x float> @load_splat_v4f32(float* %p) {
2181  %e = load float, float* %p
2182  %v1 = insertelement <4 x float> undef, float %e, i32 0
2183  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2184  ret <4 x float> %v2
2185}
2186
2187; CHECK-LABEL: load_v4f32_with_folded_offset:
2188; NO-SIMD128-NOT: v128
2189; SIMD128-NEXT: .functype load_v4f32_with_folded_offset (i32) -> (v128){{$}}
2190; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2191; SIMD128-NEXT: return $pop[[R]]{{$}}
2192define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
2193  %q = ptrtoint <4 x float>* %p to i32
2194  %r = add nuw i32 %q, 16
2195  %s = inttoptr i32 %r to <4 x float>*
2196  %v = load <4 x float>, <4 x float>* %s
2197  ret <4 x float> %v
2198}
2199
2200; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2201; NO-SIMD128-NOT: v128
2202; SIMD128-NEXT: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128){{$}}
2203; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
2204; SIMD128-NEXT: return $pop[[R]]{{$}}
2205define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
2206  %q = ptrtoint float* %p to i32
2207  %r = add nuw i32 %q, 16
2208  %s = inttoptr i32 %r to float*
2209  %e = load float, float* %s
2210  %v1 = insertelement <4 x float> undef, float %e, i32 0
2211  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2212  ret <4 x float> %v2
2213}
2214
2215; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2216; NO-SIMD128-NOT: v128
2217; SIMD128-NEXT: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128){{$}}
2218; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2219; SIMD128-NEXT: return $pop[[R]]{{$}}
2220define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
2221  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2222  %v = load <4 x float>, <4 x float>* %s
2223  ret <4 x float> %v
2224}
2225
2226; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2227; NO-SIMD128-NOT: v128
2228; SIMD128-NEXT: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128){{$}}
2229; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 4($0){{$}}
2230; SIMD128-NEXT: return $pop[[R]]{{$}}
2231define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
2232  %s = getelementptr inbounds float, float* %p, i32 1
2233  %e = load float, float* %s
2234  %v1 = insertelement <4 x float> undef, float %e, i32 0
2235  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2236  ret <4 x float> %v2
2237}
2238
2239; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2240; NO-SIMD128-NOT: v128
2241; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2242; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2243; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2244; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2245; SIMD128-NEXT: return $pop[[R]]{{$}}
2246define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
2247  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2248  %v = load <4 x float>, <4 x float>* %s
2249  ret <4 x float> %v
2250}
2251
2252; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2253; NO-SIMD128-NOT: v128
2254; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2255; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -4{{$}}
2256; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2257; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2258; SIMD128-NEXT: return $pop[[R]]{{$}}
2259define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
2260  %s = getelementptr inbounds float, float* %p, i32 -1
2261  %e = load float, float* %s
2262  %v1 = insertelement <4 x float> undef, float %e, i32 0
2263  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2264  ret <4 x float> %v2
2265}
2266
2267; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2268; NO-SIMD128-NOT: v128
2269; SIMD128-NEXT: .functype load_v4f32_with_unfolded_offset (i32) -> (v128){{$}}
2270; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2271; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2272; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2273; SIMD128-NEXT: return $pop[[R]]{{$}}
2274define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
2275  %q = ptrtoint <4 x float>* %p to i32
2276  %r = add nsw i32 %q, 16
2277  %s = inttoptr i32 %r to <4 x float>*
2278  %v = load <4 x float>, <4 x float>* %s
2279  ret <4 x float> %v
2280}
2281
2282; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2283; NO-SIMD128-NOT: v128
2284; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128){{$}}
2285; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2286; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2287; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2288; SIMD128-NEXT: return $pop[[R]]{{$}}
2289define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
2290  %q = ptrtoint float* %p to i32
2291  %r = add nsw i32 %q, 16
2292  %s = inttoptr i32 %r to float*
2293  %e = load float, float* %s
2294  %v1 = insertelement <4 x float> undef, float %e, i32 0
2295  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2296  ret <4 x float> %v2
2297}
2298
2299; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2300; NO-SIMD128-NOT: v128
2301; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}}
2302; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2303; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2304; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2305; SIMD128-NEXT: return $pop[[R]]{{$}}
2306define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
2307  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2308  %v = load <4 x float>, <4 x float>* %s
2309  ret <4 x float> %v
2310}
2311
2312; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
2313; NO-SIMD128-NOT: v128
2314; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}}
2315; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 4{{$}}
2316; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2317; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2318; SIMD128-NEXT: return $pop[[R]]{{$}}
2319define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
2320  %s = getelementptr float, float* %p, i32 1
2321  %e = load float, float* %s
2322  %v1 = insertelement <4 x float> undef, float %e, i32 0
2323  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2324  ret <4 x float> %v2
2325}
2326
2327; CHECK-LABEL: load_v4f32_from_numeric_address:
2328; NO-SIMD128-NOT: v128
2329; SIMD128-NEXT: .functype load_v4f32_from_numeric_address () -> (v128){{$}}
2330; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2331; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2332; SIMD128-NEXT: return $pop[[R]]{{$}}
2333define <4 x float> @load_v4f32_from_numeric_address() {
2334  %s = inttoptr i32 32 to <4 x float>*
2335  %v = load <4 x float>, <4 x float>* %s
2336  ret <4 x float> %v
2337}
2338
2339; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
2340; NO-SIMD128-NOT: v128
2341; SIMD128-NEXT: .functype load_splat_v4f32_from_numeric_address () -> (v128){{$}}
2342; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2343; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2344; SIMD128-NEXT: return $pop[[R]]{{$}}
2345define <4 x float> @load_splat_v4f32_from_numeric_address() {
2346  %s = inttoptr i32 32 to float*
2347  %e = load float, float* %s
2348  %v1 = insertelement <4 x float> undef, float %e, i32 0
2349  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2350  ret <4 x float> %v2
2351}
2352
2353; CHECK-LABEL: load_v4f32_from_global_address:
2354; NO-SIMD128-NOT: v128
2355; SIMD128-NEXT: .functype load_v4f32_from_global_address () -> (v128){{$}}
2356; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2357; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4f32($pop[[L0]]){{$}}
2358; SIMD128-NEXT: return $pop[[R]]{{$}}
2359@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
2360define <4 x float> @load_v4f32_from_global_address() {
2361  %v = load <4 x float>, <4 x float>* @gv_v4f32
2362  ret <4 x float> %v
2363}
2364
2365; CHECK-LABEL: load_splat_v4f32_from_global_address:
2366; NO-SIMD128-NOT: v128
2367; SIMD128-NEXT: .functype load_splat_v4f32_from_global_address () -> (v128){{$}}
2368; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2369; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, gv_f32($pop[[L0]]){{$}}
2370; SIMD128-NEXT: return $pop[[R]]{{$}}
2371@gv_f32 = global float 42.
2372define <4 x float> @load_splat_v4f32_from_global_address() {
2373  %e = load float, float* @gv_f32
2374  %v1 = insertelement <4 x float> undef, float %e, i32 0
2375  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2376  ret <4 x float> %v2
2377}
2378
2379; CHECK-LABEL: store_v4f32:
2380; NO-SIMD128-NOT: v128
2381; SIMD128-NEXT: .functype store_v4f32 (v128, i32) -> (){{$}}
2382; SIMD128-NEXT: v128.store 0($1), $0{{$}}
2383define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
2384  store <4 x float> %v , <4 x float>* %p
2385  ret void
2386}
2387
2388; CHECK-LABEL: store_v4f32_with_folded_offset:
2389; NO-SIMD128-NOT: v128
2390; SIMD128-NEXT: .functype store_v4f32_with_folded_offset (v128, i32) -> (){{$}}
2391; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2392define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
2393  %q = ptrtoint <4 x float>* %p to i32
2394  %r = add nuw i32 %q, 16
2395  %s = inttoptr i32 %r to <4 x float>*
2396  store <4 x float> %v , <4 x float>* %s
2397  ret void
2398}
2399
2400; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
2401; NO-SIMD128-NOT: v128
2402; SIMD128-NEXT: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> (){{$}}
2403; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2404define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2405  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2406  store <4 x float> %v , <4 x float>* %s
2407  ret void
2408}
2409
2410; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
2411; NO-SIMD128-NOT: v128
2412; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
2413; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2414; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2415; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2416define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
2417  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2418  store <4 x float> %v , <4 x float>* %s
2419  ret void
2420}
2421
2422; CHECK-LABEL: store_v4f32_with_unfolded_offset:
2423; NO-SIMD128-NOT: v128
2424; SIMD128-NEXT: .functype store_v4f32_with_unfolded_offset (v128, i32) -> (){{$}}
2425; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2426; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2427; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2428define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
2429  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2430  store <4 x float> %v , <4 x float>* %s
2431  ret void
2432}
2433
2434; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
2435; NO-SIMD128-NOT: v128
2436; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> (){{$}}
2437; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2438; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2439; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2440define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2441  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2442  store <4 x float> %v , <4 x float>* %s
2443  ret void
2444}
2445
2446; CHECK-LABEL: store_v4f32_to_numeric_address:
2447; NO-SIMD128-NOT: v128
2448; SIMD128-NEXT: .functype store_v4f32_to_numeric_address (v128) -> (){{$}}
2449; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2450; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
2451define void @store_v4f32_to_numeric_address(<4 x float> %v) {
2452  %s = inttoptr i32 32 to <4 x float>*
2453  store <4 x float> %v , <4 x float>* %s
2454  ret void
2455}
2456
2457; CHECK-LABEL: store_v4f32_to_global_address:
2458; NO-SIMD128-NOT: v128
2459; SIMD128-NEXT: .functype store_v4f32_to_global_address (v128) -> (){{$}}
2460; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
2461; SIMD128-NEXT: v128.store gv_v4f32($pop[[R]]), $0{{$}}
2462define void @store_v4f32_to_global_address(<4 x float> %v) {
2463  store <4 x float> %v , <4 x float>* @gv_v4f32
2464  ret void
2465}
2466
2467; ==============================================================================
2468; 2 x double
2469; ==============================================================================
2470; CHECK-LABEL: load_v2f64:
2471; NO-SIMD128-NOT: v128
2472; SIMD128-NEXT: .functype load_v2f64 (i32) -> (v128){{$}}
2473; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
2474; SIMD128-NEXT: return $pop[[R]]{{$}}
2475define <2 x double> @load_v2f64(<2 x double>* %p) {
2476  %v = load <2 x double>, <2 x double>* %p
2477  ret <2 x double> %v
2478}
2479
2480; CHECK-LABEL: load_splat_v2f64:
2481; NO-SIMD128-NOT: v128
2482; SIMD128-NEXT: .functype load_splat_v2f64 (i32) -> (v128){{$}}
2483; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
2484; SIMD128-NEXT: return $pop[[R]]{{$}}
2485define <2 x double> @load_splat_v2f64(double* %p) {
2486  %e = load double, double* %p
2487  %v1 = insertelement <2 x double> undef, double %e, i32 0
2488  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2489  ret <2 x double> %v2
2490}
2491
2492; CHECK-LABEL: load_v2f64_with_folded_offset:
2493; NO-SIMD128-NOT: v128
2494; SIMD128-NEXT: .functype load_v2f64_with_folded_offset (i32) -> (v128){{$}}
2495; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2496; SIMD128-NEXT: return $pop[[R]]{{$}}
2497define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
2498  %q = ptrtoint <2 x double>* %p to i32
2499  %r = add nuw i32 %q, 16
2500  %s = inttoptr i32 %r to <2 x double>*
2501  %v = load <2 x double>, <2 x double>* %s
2502  ret <2 x double> %v
2503}
2504
2505; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
2506; NO-SIMD128-NOT: v128
2507; SIMD128-NEXT: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128){{$}}
2508; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
2509; SIMD128-NEXT: return $pop[[R]]{{$}}
2510define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
2511  %q = ptrtoint double* %p to i32
2512  %r = add nuw i32 %q, 16
2513  %s = inttoptr i32 %r to double*
2514  %e = load double, double* %s
2515  %v1 = insertelement <2 x double> undef, double %e, i32 0
2516  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2517  ret <2 x double> %v2
2518}
2519
2520; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
2521; NO-SIMD128-NOT: v128
2522; SIMD128-NEXT: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128){{$}}
2523; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2524; SIMD128-NEXT: return $pop[[R]]{{$}}
2525define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
2526  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
2527  %v = load <2 x double>, <2 x double>* %s
2528  ret <2 x double> %v
2529}
2530
2531; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
2532; NO-SIMD128-NOT: v128
2533; SIMD128-NEXT: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128){{$}}
2534; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 8($0){{$}}
2535; SIMD128-NEXT: return $pop[[R]]{{$}}
2536define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
2537  %s = getelementptr inbounds double, double* %p, i32 1
2538  %e = load double, double* %s
2539  %v1 = insertelement <2 x double> undef, double %e, i32 0
2540  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2541  ret <2 x double> %v2
2542}
2543
2544; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
2545; NO-SIMD128-NOT: v128
2546; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2547; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2548; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2549; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2550; SIMD128-NEXT: return $pop[[R]]{{$}}
2551define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
2552  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2553  %v = load <2 x double>, <2 x double>* %s
2554  ret <2 x double> %v
2555}
2556
2557; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
2558; NO-SIMD128-NOT: v128
2559; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2560; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
2561; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2562; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2563; SIMD128-NEXT: return $pop[[R]]{{$}}
2564define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
2565  %s = getelementptr inbounds double, double* %p, i32 -1
2566  %e = load double, double* %s
2567  %v1 = insertelement <2 x double> undef, double %e, i32 0
2568  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2569  ret <2 x double> %v2
2570}
2571
2572; CHECK-LABEL: load_v2f64_with_unfolded_offset:
2573; NO-SIMD128-NOT: v128
2574; SIMD128-NEXT: .functype load_v2f64_with_unfolded_offset (i32) -> (v128){{$}}
2575; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2576; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2577; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2578; SIMD128-NEXT: return $pop[[R]]{{$}}
2579define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
2580  %q = ptrtoint <2 x double>* %p to i32
2581  %r = add nsw i32 %q, 16
2582  %s = inttoptr i32 %r to <2 x double>*
2583  %v = load <2 x double>, <2 x double>* %s
2584  ret <2 x double> %v
2585}
2586
2587; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
2588; NO-SIMD128-NOT: v128
2589; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128){{$}}
2590; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2591; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2592; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2593; SIMD128-NEXT: return $pop[[R]]{{$}}
2594define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
2595  %q = ptrtoint double* %p to i32
2596  %r = add nsw i32 %q, 16
2597  %s = inttoptr i32 %r to double*
2598  %e = load double, double* %s
2599  %v1 = insertelement <2 x double> undef, double %e, i32 0
2600  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2601  ret <2 x double> %v2
2602}
2603
2604; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
2605; NO-SIMD128-NOT: v128
2606; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}}
2607; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2608; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2609; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2610; SIMD128-NEXT: return $pop[[R]]{{$}}
2611define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
2612  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
2613  %v = load <2 x double>, <2 x double>* %s
2614  ret <2 x double> %v
2615}
2616
2617; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
2618; NO-SIMD128-NOT: v128
2619; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}}
2620; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
2621; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2622; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2623; SIMD128-NEXT: return $pop[[R]]{{$}}
2624define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
2625  %s = getelementptr double, double* %p, i32 1
2626  %e = load double, double* %s
2627  %v1 = insertelement <2 x double> undef, double %e, i32 0
2628  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2629  ret <2 x double> %v2
2630}
2631
2632; CHECK-LABEL: load_v2f64_from_numeric_address:
2633; NO-SIMD128-NOT: v128
2634; SIMD128-NEXT: .functype load_v2f64_from_numeric_address () -> (v128){{$}}
2635; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2636; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2637; SIMD128-NEXT: return $pop[[R]]{{$}}
2638define <2 x double> @load_v2f64_from_numeric_address() {
2639  %s = inttoptr i32 32 to <2 x double>*
2640  %v = load <2 x double>, <2 x double>* %s
2641  ret <2 x double> %v
2642}
2643
2644; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
2645; NO-SIMD128-NOT: v128
2646; SIMD128-NEXT: .functype load_splat_v2f64_from_numeric_address () -> (v128){{$}}
2647; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2648; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2649; SIMD128-NEXT: return $pop[[R]]{{$}}
2650define <2 x double> @load_splat_v2f64_from_numeric_address() {
2651  %s = inttoptr i32 32 to double*
2652  %e = load double, double* %s
2653  %v1 = insertelement <2 x double> undef, double %e, i32 0
2654  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2655  ret <2 x double> %v2
2656}
2657
2658; CHECK-LABEL: load_v2f64_from_global_address:
2659; NO-SIMD128-NOT: v128
2660; SIMD128-NEXT: .functype load_v2f64_from_global_address () -> (v128){{$}}
2661; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2662; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2f64($pop[[L0]]){{$}}
2663; SIMD128-NEXT: return $pop[[R]]{{$}}
2664@gv_v2f64 = global <2 x double> <double 42., double 42.>
2665define <2 x double> @load_v2f64_from_global_address() {
2666  %v = load <2 x double>, <2 x double>* @gv_v2f64
2667  ret <2 x double> %v
2668}
2669
2670; CHECK-LABEL: load_splat_v2f64_from_global_address:
2671; NO-SIMD128-NOT: v128
2672; SIMD128-NEXT: .functype load_splat_v2f64_from_global_address () -> (v128){{$}}
2673; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2674; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, gv_f64($pop[[L0]]){{$}}
2675; SIMD128-NEXT: return $pop[[R]]{{$}}
2676@gv_f64 = global double 42.
2677define <2 x double> @load_splat_v2f64_from_global_address() {
2678  %e = load double, double* @gv_f64
2679  %v1 = insertelement <2 x double> undef, double %e, i32 0
2680  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2681  ret <2 x double> %v2
2682}
2683
2684; CHECK-LABEL: store_v2f64:
2685; NO-SIMD128-NOT: v128
2686; SIMD128-NEXT: .functype store_v2f64 (v128, i32) -> (){{$}}
2687; SIMD128-NEXT: v128.store 0($1), $0{{$}}
2688define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
2689  store <2 x double> %v , <2 x double>* %p
2690  ret void
2691}
2692
2693; CHECK-LABEL: store_v2f64_with_folded_offset:
2694; NO-SIMD128-NOT: v128
2695; SIMD128-NEXT: .functype store_v2f64_with_folded_offset (v128, i32) -> (){{$}}
2696; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2697define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
2698  %q = ptrtoint <2 x double>* %p to i32
2699  %r = add nuw i32 %q, 16
2700  %s = inttoptr i32 %r to <2 x double>*
2701  store <2 x double> %v , <2 x double>* %s
2702  ret void
2703}
2704
2705; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
2706; NO-SIMD128-NOT: v128
2707; SIMD128-NEXT: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> (){{$}}
2708; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2709define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
2710  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
2711  store <2 x double> %v , <2 x double>* %s
2712  ret void
2713}
2714
2715; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
2716; NO-SIMD128-NOT: v128
2717; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
2718; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2719; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2720; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2721define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
2722  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2723  store <2 x double> %v , <2 x double>* %s
2724  ret void
2725}
2726
2727; CHECK-LABEL: store_v2f64_with_unfolded_offset:
2728; NO-SIMD128-NOT: v128
2729; SIMD128-NEXT: .functype store_v2f64_with_unfolded_offset (v128, i32) -> (){{$}}
2730; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2731; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2732; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2733define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
2734  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2735  store <2 x double> %v , <2 x double>* %s
2736  ret void
2737}
2738
2739; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
2740; NO-SIMD128-NOT: v128
2741; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> (){{$}}
2742; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2743; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2744; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2745define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
2746  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
2747  store <2 x double> %v , <2 x double>* %s
2748  ret void
2749}
2750
2751; CHECK-LABEL: store_v2f64_to_numeric_address:
2752; NO-SIMD128-NOT: v128
2753; SIMD128-NEXT: .functype store_v2f64_to_numeric_address (v128) -> (){{$}}
2754; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2755; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
2756define void @store_v2f64_to_numeric_address(<2 x double> %v) {
2757  %s = inttoptr i32 32 to <2 x double>*
2758  store <2 x double> %v , <2 x double>* %s
2759  ret void
2760}
2761
2762; CHECK-LABEL: store_v2f64_to_global_address:
2763; NO-SIMD128-NOT: v128
2764; SIMD128-NEXT: .functype store_v2f64_to_global_address (v128) -> (){{$}}
2765; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
2766; SIMD128-NEXT: v128.store gv_v2f64($pop[[R]]), $0{{$}}
2767define void @store_v2f64_to_global_address(<2 x double> %v) {
2768  store <2 x double> %v , <2 x double>* @gv_v2f64
2769  ret void
2770}
2771