1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test SIMD loads and stores
5
6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
7target triple = "wasm32-unknown-unknown"
8
9; ==============================================================================
10; 16 x i8
11; ==============================================================================
12define <16 x i8> @load_v16i8(<16 x i8>* %p) {
13; CHECK-LABEL: load_v16i8:
14; CHECK:         .functype load_v16i8 (i32) -> (v128)
15; CHECK-NEXT:  # %bb.0:
16; CHECK-NEXT:    local.get 0
17; CHECK-NEXT:    v128.load 0
18; CHECK-NEXT:    # fallthrough-return
19  %v = load <16 x i8>, <16 x i8>* %p
20  ret <16 x i8> %v
21}
22
23define <16 x i8> @load_splat_v16i8(i8* %p) {
24; CHECK-LABEL: load_splat_v16i8:
25; CHECK:         .functype load_splat_v16i8 (i32) -> (v128)
26; CHECK-NEXT:  # %bb.0:
27; CHECK-NEXT:    local.get 0
28; CHECK-NEXT:    v8x16.load_splat 0
29; CHECK-NEXT:    # fallthrough-return
30  %e = load i8, i8* %p
31  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
32  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
33  ret <16 x i8> %v2
34}
35
36define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
37; CHECK-LABEL: load_v16i8_with_folded_offset:
38; CHECK:         .functype load_v16i8_with_folded_offset (i32) -> (v128)
39; CHECK-NEXT:  # %bb.0:
40; CHECK-NEXT:    local.get 0
41; CHECK-NEXT:    v128.load 16
42; CHECK-NEXT:    # fallthrough-return
43  %q = ptrtoint <16 x i8>* %p to i32
44  %r = add nuw i32 %q, 16
45  %s = inttoptr i32 %r to <16 x i8>*
46  %v = load <16 x i8>, <16 x i8>* %s
47  ret <16 x i8> %v
48}
49
50define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
51; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
52; CHECK:         .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
53; CHECK-NEXT:  # %bb.0:
54; CHECK-NEXT:    local.get 0
55; CHECK-NEXT:    v8x16.load_splat 16
56; CHECK-NEXT:    # fallthrough-return
57  %q = ptrtoint i8* %p to i32
58  %r = add nuw i32 %q, 16
59  %s = inttoptr i32 %r to i8*
60  %e = load i8, i8* %s
61  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
62  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
63  ret <16 x i8> %v2
64}
65
66define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
67; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
68; CHECK:         .functype load_v16i8_with_folded_gep_offset (i32) -> (v128)
69; CHECK-NEXT:  # %bb.0:
70; CHECK-NEXT:    local.get 0
71; CHECK-NEXT:    v128.load 16
72; CHECK-NEXT:    # fallthrough-return
73  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
74  %v = load <16 x i8>, <16 x i8>* %s
75  ret <16 x i8> %v
76}
77
78define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
79; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
80; CHECK:         .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
81; CHECK-NEXT:  # %bb.0:
82; CHECK-NEXT:    local.get 0
83; CHECK-NEXT:    v8x16.load_splat 1
84; CHECK-NEXT:    # fallthrough-return
85  %s = getelementptr inbounds i8, i8* %p, i32 1
86  %e = load i8, i8* %s
87  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
88  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
89  ret <16 x i8> %v2
90}
91
92define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
93; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
94; CHECK:         .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
95; CHECK-NEXT:  # %bb.0:
96; CHECK-NEXT:    local.get 0
97; CHECK-NEXT:    i32.const -16
98; CHECK-NEXT:    i32.add
99; CHECK-NEXT:    v128.load 0
100; CHECK-NEXT:    # fallthrough-return
101  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
102  %v = load <16 x i8>, <16 x i8>* %s
103  ret <16 x i8> %v
104}
105
106define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
107; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
108; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
109; CHECK-NEXT:  # %bb.0:
110; CHECK-NEXT:    local.get 0
111; CHECK-NEXT:    i32.const -1
112; CHECK-NEXT:    i32.add
113; CHECK-NEXT:    v8x16.load_splat 0
114; CHECK-NEXT:    # fallthrough-return
115  %s = getelementptr inbounds i8, i8* %p, i32 -1
116  %e = load i8, i8* %s
117  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
118  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
119  ret <16 x i8> %v2
120}
121
122define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
123; CHECK-LABEL: load_v16i8_with_unfolded_offset:
124; CHECK:         .functype load_v16i8_with_unfolded_offset (i32) -> (v128)
125; CHECK-NEXT:  # %bb.0:
126; CHECK-NEXT:    local.get 0
127; CHECK-NEXT:    i32.const 16
128; CHECK-NEXT:    i32.add
129; CHECK-NEXT:    v128.load 0
130; CHECK-NEXT:    # fallthrough-return
131  %q = ptrtoint <16 x i8>* %p to i32
132  %r = add nsw i32 %q, 16
133  %s = inttoptr i32 %r to <16 x i8>*
134  %v = load <16 x i8>, <16 x i8>* %s
135  ret <16 x i8> %v
136}
137
138define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
139; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
140; CHECK:         .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128)
141; CHECK-NEXT:  # %bb.0:
142; CHECK-NEXT:    local.get 0
143; CHECK-NEXT:    i32.const 16
144; CHECK-NEXT:    i32.add
145; CHECK-NEXT:    v8x16.load_splat 0
146; CHECK-NEXT:    # fallthrough-return
147  %q = ptrtoint i8* %p to i32
148  %r = add nsw i32 %q, 16
149  %s = inttoptr i32 %r to i8*
150  %e = load i8, i8* %s
151  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
152  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
153  ret <16 x i8> %v2
154}
155
156define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
157; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
158; CHECK:         .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128)
159; CHECK-NEXT:  # %bb.0:
160; CHECK-NEXT:    local.get 0
161; CHECK-NEXT:    i32.const 16
162; CHECK-NEXT:    i32.add
163; CHECK-NEXT:    v128.load 0
164; CHECK-NEXT:    # fallthrough-return
165  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
166  %v = load <16 x i8>, <16 x i8>* %s
167  ret <16 x i8> %v
168}
169
170define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
171; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
172; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128)
173; CHECK-NEXT:  # %bb.0:
174; CHECK-NEXT:    local.get 0
175; CHECK-NEXT:    i32.const 1
176; CHECK-NEXT:    i32.add
177; CHECK-NEXT:    v8x16.load_splat 0
178; CHECK-NEXT:    # fallthrough-return
179  %s = getelementptr i8, i8* %p, i32 1
180  %e = load i8, i8* %s
181  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
182  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
183  ret <16 x i8> %v2
184}
185
186define <16 x i8> @load_v16i8_from_numeric_address() {
187; CHECK-LABEL: load_v16i8_from_numeric_address:
188; CHECK:         .functype load_v16i8_from_numeric_address () -> (v128)
189; CHECK-NEXT:  # %bb.0:
190; CHECK-NEXT:    i32.const 0
191; CHECK-NEXT:    v128.load 32
192; CHECK-NEXT:    # fallthrough-return
193  %s = inttoptr i32 32 to <16 x i8>*
194  %v = load <16 x i8>, <16 x i8>* %s
195  ret <16 x i8> %v
196}
197
198define <16 x i8> @load_splat_v16i8_from_numeric_address() {
199; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
200; CHECK:         .functype load_splat_v16i8_from_numeric_address () -> (v128)
201; CHECK-NEXT:  # %bb.0:
202; CHECK-NEXT:    i32.const 0
203; CHECK-NEXT:    v8x16.load_splat 32
204; CHECK-NEXT:    # fallthrough-return
205  %s = inttoptr i32 32 to i8*
206  %e = load i8, i8* %s
207  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
208  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
209  ret <16 x i8> %v2
210}
211
212@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
213define <16 x i8> @load_v16i8_from_global_address() {
214; CHECK-LABEL: load_v16i8_from_global_address:
215; CHECK:         .functype load_v16i8_from_global_address () -> (v128)
216; CHECK-NEXT:  # %bb.0:
217; CHECK-NEXT:    i32.const 0
218; CHECK-NEXT:    v128.load gv_v16i8
219; CHECK-NEXT:    # fallthrough-return
220  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
221  ret <16 x i8> %v
222}
223
224@gv_i8 = global i8 42
225define <16 x i8> @load_splat_v16i8_from_global_address() {
226; CHECK-LABEL: load_splat_v16i8_from_global_address:
227; CHECK:         .functype load_splat_v16i8_from_global_address () -> (v128)
228; CHECK-NEXT:  # %bb.0:
229; CHECK-NEXT:    i32.const 0
230; CHECK-NEXT:    v8x16.load_splat gv_i8
231; CHECK-NEXT:    # fallthrough-return
232  %e = load i8, i8* @gv_i8
233  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
234  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
235  ret <16 x i8> %v2
236}
237
238define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
239; CHECK-LABEL: store_v16i8:
240; CHECK:         .functype store_v16i8 (v128, i32) -> ()
241; CHECK-NEXT:  # %bb.0:
242; CHECK-NEXT:    local.get 1
243; CHECK-NEXT:    local.get 0
244; CHECK-NEXT:    v128.store 0
245; CHECK-NEXT:    # fallthrough-return
246  store <16 x i8> %v , <16 x i8>* %p
247  ret void
248}
249
250define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
251; CHECK-LABEL: store_v16i8_with_folded_offset:
252; CHECK:         .functype store_v16i8_with_folded_offset (v128, i32) -> ()
253; CHECK-NEXT:  # %bb.0:
254; CHECK-NEXT:    local.get 1
255; CHECK-NEXT:    local.get 0
256; CHECK-NEXT:    v128.store 16
257; CHECK-NEXT:    # fallthrough-return
258  %q = ptrtoint <16 x i8>* %p to i32
259  %r = add nuw i32 %q, 16
260  %s = inttoptr i32 %r to <16 x i8>*
261  store <16 x i8> %v , <16 x i8>* %s
262  ret void
263}
264
265define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
266; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
267; CHECK:         .functype store_v16i8_with_folded_gep_offset (v128, i32) -> ()
268; CHECK-NEXT:  # %bb.0:
269; CHECK-NEXT:    local.get 1
270; CHECK-NEXT:    local.get 0
271; CHECK-NEXT:    v128.store 16
272; CHECK-NEXT:    # fallthrough-return
273  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
274  store <16 x i8> %v , <16 x i8>* %s
275  ret void
276}
277
278define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
279; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
280; CHECK:         .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
281; CHECK-NEXT:  # %bb.0:
282; CHECK-NEXT:    local.get 1
283; CHECK-NEXT:    i32.const -16
284; CHECK-NEXT:    i32.add
285; CHECK-NEXT:    local.get 0
286; CHECK-NEXT:    v128.store 0
287; CHECK-NEXT:    # fallthrough-return
288  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
289  store <16 x i8> %v , <16 x i8>* %s
290  ret void
291}
292
293define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
294; CHECK-LABEL: store_v16i8_with_unfolded_offset:
295; CHECK:         .functype store_v16i8_with_unfolded_offset (v128, i32) -> ()
296; CHECK-NEXT:  # %bb.0:
297; CHECK-NEXT:    local.get 1
298; CHECK-NEXT:    i32.const -16
299; CHECK-NEXT:    i32.add
300; CHECK-NEXT:    local.get 0
301; CHECK-NEXT:    v128.store 0
302; CHECK-NEXT:    # fallthrough-return
303  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
304  store <16 x i8> %v , <16 x i8>* %s
305  ret void
306}
307
308define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
309; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
310; CHECK:         .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> ()
311; CHECK-NEXT:  # %bb.0:
312; CHECK-NEXT:    local.get 1
313; CHECK-NEXT:    i32.const 16
314; CHECK-NEXT:    i32.add
315; CHECK-NEXT:    local.get 0
316; CHECK-NEXT:    v128.store 0
317; CHECK-NEXT:    # fallthrough-return
318  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
319  store <16 x i8> %v , <16 x i8>* %s
320  ret void
321}
322
323define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
324; CHECK-LABEL: store_v16i8_to_numeric_address:
325; CHECK:         .functype store_v16i8_to_numeric_address (v128) -> ()
326; CHECK-NEXT:  # %bb.0:
327; CHECK-NEXT:    i32.const 0
328; CHECK-NEXT:    local.get 0
329; CHECK-NEXT:    v128.store 32
330; CHECK-NEXT:    # fallthrough-return
331  %s = inttoptr i32 32 to <16 x i8>*
332  store <16 x i8> %v , <16 x i8>* %s
333  ret void
334}
335
336define void @store_v16i8_to_global_address(<16 x i8> %v) {
337; CHECK-LABEL: store_v16i8_to_global_address:
338; CHECK:         .functype store_v16i8_to_global_address (v128) -> ()
339; CHECK-NEXT:  # %bb.0:
340; CHECK-NEXT:    i32.const 0
341; CHECK-NEXT:    local.get 0
342; CHECK-NEXT:    v128.store gv_v16i8
343; CHECK-NEXT:    # fallthrough-return
344  store <16 x i8> %v , <16 x i8>* @gv_v16i8
345  ret void
346}
347
348; ==============================================================================
349; 8 x i16
350; ==============================================================================
351define <8 x i16> @load_v8i16(<8 x i16>* %p) {
352; CHECK-LABEL: load_v8i16:
353; CHECK:         .functype load_v8i16 (i32) -> (v128)
354; CHECK-NEXT:  # %bb.0:
355; CHECK-NEXT:    local.get 0
356; CHECK-NEXT:    v128.load 0
357; CHECK-NEXT:    # fallthrough-return
358  %v = load <8 x i16>, <8 x i16>* %p
359  ret <8 x i16> %v
360}
361
362define <8 x i16> @load_splat_v8i16(i16* %p) {
363; CHECK-LABEL: load_splat_v8i16:
364; CHECK:         .functype load_splat_v8i16 (i32) -> (v128)
365; CHECK-NEXT:  # %bb.0:
366; CHECK-NEXT:    local.get 0
367; CHECK-NEXT:    v16x8.load_splat 0
368; CHECK-NEXT:    # fallthrough-return
369  %e = load i16, i16* %p
370  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
371  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
372  ret <8 x i16> %v2
373}
374
375define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
376; CHECK-LABEL: load_sext_v8i16:
377; CHECK:         .functype load_sext_v8i16 (i32) -> (v128)
378; CHECK-NEXT:  # %bb.0:
379; CHECK-NEXT:    local.get 0
380; CHECK-NEXT:    i16x8.load8x8_s 0
381; CHECK-NEXT:    # fallthrough-return
382  %v = load <8 x i8>, <8 x i8>* %p
383  %v2 = sext <8 x i8> %v to <8 x i16>
384  ret <8 x i16> %v2
385}
386
387define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
388; CHECK-LABEL: load_zext_v8i16:
389; CHECK:         .functype load_zext_v8i16 (i32) -> (v128)
390; CHECK-NEXT:  # %bb.0:
391; CHECK-NEXT:    local.get 0
392; CHECK-NEXT:    i16x8.load8x8_u 0
393; CHECK-NEXT:    # fallthrough-return
394  %v = load <8 x i8>, <8 x i8>* %p
395  %v2 = zext <8 x i8> %v to <8 x i16>
396  ret <8 x i16> %v2
397}
398
399define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
400; CHECK-LABEL: load_ext_v8i16:
401; CHECK:         .functype load_ext_v8i16 (i32) -> (v128)
402; CHECK-NEXT:  # %bb.0:
403; CHECK-NEXT:    local.get 0
404; CHECK-NEXT:    i16x8.load8x8_u 0
405; CHECK-NEXT:    # fallthrough-return
406  %v = load <8 x i8>, <8 x i8>* %p
407  ret <8 x i8> %v
408}
409
410define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
411; CHECK-LABEL: load_v8i16_with_folded_offset:
412; CHECK:         .functype load_v8i16_with_folded_offset (i32) -> (v128)
413; CHECK-NEXT:  # %bb.0:
414; CHECK-NEXT:    local.get 0
415; CHECK-NEXT:    v128.load 16
416; CHECK-NEXT:    # fallthrough-return
417  %q = ptrtoint <8 x i16>* %p to i32
418  %r = add nuw i32 %q, 16
419  %s = inttoptr i32 %r to <8 x i16>*
420  %v = load <8 x i16>, <8 x i16>* %s
421  ret <8 x i16> %v
422}
423
424define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
425; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
426; CHECK:         .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
427; CHECK-NEXT:  # %bb.0:
428; CHECK-NEXT:    local.get 0
429; CHECK-NEXT:    v16x8.load_splat 16
430; CHECK-NEXT:    # fallthrough-return
431  %q = ptrtoint i16* %p to i32
432  %r = add nuw i32 %q, 16
433  %s = inttoptr i32 %r to i16*
434  %e = load i16, i16* %s
435  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
436  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
437  ret <8 x i16> %v2
438}
439
440define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
441; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
442; CHECK:         .functype load_sext_v8i16_with_folded_offset (i32) -> (v128)
443; CHECK-NEXT:  # %bb.0:
444; CHECK-NEXT:    local.get 0
445; CHECK-NEXT:    i16x8.load8x8_s 16
446; CHECK-NEXT:    # fallthrough-return
447  %q = ptrtoint <8 x i8>* %p to i32
448  %r = add nuw i32 %q, 16
449  %s = inttoptr i32 %r to <8 x i8>*
450  %v = load <8 x i8>, <8 x i8>* %s
451  %v2 = sext <8 x i8> %v to <8 x i16>
452  ret <8 x i16> %v2
453}
454
455define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
456; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
457; CHECK:         .functype load_zext_v8i16_with_folded_offset (i32) -> (v128)
458; CHECK-NEXT:  # %bb.0:
459; CHECK-NEXT:    local.get 0
460; CHECK-NEXT:    i16x8.load8x8_u 16
461; CHECK-NEXT:    # fallthrough-return
462  %q = ptrtoint <8 x i8>* %p to i32
463  %r = add nuw i32 %q, 16
464  %s = inttoptr i32 %r to <8 x i8>*
465  %v = load <8 x i8>, <8 x i8>* %s
466  %v2 = zext <8 x i8> %v to <8 x i16>
467  ret <8 x i16> %v2
468}
469
470define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
471; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
472; CHECK:         .functype load_ext_v8i16_with_folded_offset (i32) -> (v128)
473; CHECK-NEXT:  # %bb.0:
474; CHECK-NEXT:    local.get 0
475; CHECK-NEXT:    i16x8.load8x8_u 16
476; CHECK-NEXT:    # fallthrough-return
477  %q = ptrtoint <8 x i8>* %p to i32
478  %r = add nuw i32 %q, 16
479  %s = inttoptr i32 %r to <8 x i8>*
480  %v = load <8 x i8>, <8 x i8>* %s
481  ret <8 x i8> %v
482}
483
484define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
485; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
486; CHECK:         .functype load_v8i16_with_folded_gep_offset (i32) -> (v128)
487; CHECK-NEXT:  # %bb.0:
488; CHECK-NEXT:    local.get 0
489; CHECK-NEXT:    v128.load 16
490; CHECK-NEXT:    # fallthrough-return
491  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
492  %v = load <8 x i16>, <8 x i16>* %s
493  ret <8 x i16> %v
494}
495
496define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
497; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
498; CHECK:         .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
499; CHECK-NEXT:  # %bb.0:
500; CHECK-NEXT:    local.get 0
501; CHECK-NEXT:    v16x8.load_splat 2
502; CHECK-NEXT:    # fallthrough-return
503  %s = getelementptr inbounds i16, i16* %p, i32 1
504  %e = load i16, i16* %s
505  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
506  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
507  ret <8 x i16> %v2
508}
509
510define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
511; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
512; CHECK:         .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128)
513; CHECK-NEXT:  # %bb.0:
514; CHECK-NEXT:    local.get 0
515; CHECK-NEXT:    i16x8.load8x8_s 8
516; CHECK-NEXT:    # fallthrough-return
517  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
518  %v = load <8 x i8>, <8 x i8>* %s
519  %v2 = sext <8 x i8> %v to <8 x i16>
520  ret <8 x i16> %v2
521}
522
523define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
524; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
525; CHECK:         .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128)
526; CHECK-NEXT:  # %bb.0:
527; CHECK-NEXT:    local.get 0
528; CHECK-NEXT:    i16x8.load8x8_u 8
529; CHECK-NEXT:    # fallthrough-return
530  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
531  %v = load <8 x i8>, <8 x i8>* %s
532  %v2 = zext <8 x i8> %v to <8 x i16>
533  ret <8 x i16> %v2
534}
535
536define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
537; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
538; CHECK:         .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128)
539; CHECK-NEXT:  # %bb.0:
540; CHECK-NEXT:    local.get 0
541; CHECK-NEXT:    i16x8.load8x8_u 8
542; CHECK-NEXT:    # fallthrough-return
543  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
544  %v = load <8 x i8>, <8 x i8>* %s
545  ret <8 x i8> %v
546}
547
548define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
549; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
550; CHECK:         .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
551; CHECK-NEXT:  # %bb.0:
552; CHECK-NEXT:    local.get 0
553; CHECK-NEXT:    i32.const -16
554; CHECK-NEXT:    i32.add
555; CHECK-NEXT:    v128.load 0
556; CHECK-NEXT:    # fallthrough-return
557  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
558  %v = load <8 x i16>, <8 x i16>* %s
559  ret <8 x i16> %v
560}
561
562define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
563; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
564; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
565; CHECK-NEXT:  # %bb.0:
566; CHECK-NEXT:    local.get 0
567; CHECK-NEXT:    i32.const -2
568; CHECK-NEXT:    i32.add
569; CHECK-NEXT:    v16x8.load_splat 0
570; CHECK-NEXT:    # fallthrough-return
571  %s = getelementptr inbounds i16, i16* %p, i32 -1
572  %e = load i16, i16* %s
573  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
574  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
575  ret <8 x i16> %v2
576}
577
578define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
579; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
580; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
581; CHECK-NEXT:  # %bb.0:
582; CHECK-NEXT:    local.get 0
583; CHECK-NEXT:    i32.const -8
584; CHECK-NEXT:    i32.add
585; CHECK-NEXT:    i16x8.load8x8_s 0
586; CHECK-NEXT:    # fallthrough-return
587  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
588  %v = load <8 x i8>, <8 x i8>* %s
589  %v2 = sext <8 x i8> %v to <8 x i16>
590  ret <8 x i16> %v2
591}
592
593define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
594; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
595; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
596; CHECK-NEXT:  # %bb.0:
597; CHECK-NEXT:    local.get 0
598; CHECK-NEXT:    i32.const -8
599; CHECK-NEXT:    i32.add
600; CHECK-NEXT:    i16x8.load8x8_u 0
601; CHECK-NEXT:    # fallthrough-return
602  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
603  %v = load <8 x i8>, <8 x i8>* %s
604  %v2 = zext <8 x i8> %v to <8 x i16>
605  ret <8 x i16> %v2
606}
607
608define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
609; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
610; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
611; CHECK-NEXT:  # %bb.0:
612; CHECK-NEXT:    local.get 0
613; CHECK-NEXT:    i32.const -8
614; CHECK-NEXT:    i32.add
615; CHECK-NEXT:    i16x8.load8x8_u 0
616; CHECK-NEXT:    # fallthrough-return
617  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
618  %v = load <8 x i8>, <8 x i8>* %s
619  ret <8 x i8> %v
620}
621
622define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
623; CHECK-LABEL: load_v8i16_with_unfolded_offset:
624; CHECK:         .functype load_v8i16_with_unfolded_offset (i32) -> (v128)
625; CHECK-NEXT:  # %bb.0:
626; CHECK-NEXT:    local.get 0
627; CHECK-NEXT:    i32.const 16
628; CHECK-NEXT:    i32.add
629; CHECK-NEXT:    v128.load 0
630; CHECK-NEXT:    # fallthrough-return
631  %q = ptrtoint <8 x i16>* %p to i32
632  %r = add nsw i32 %q, 16
633  %s = inttoptr i32 %r to <8 x i16>*
634  %v = load <8 x i16>, <8 x i16>* %s
635  ret <8 x i16> %v
636}
637
638define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
639; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
640; CHECK:         .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128)
641; CHECK-NEXT:  # %bb.0:
642; CHECK-NEXT:    local.get 0
643; CHECK-NEXT:    i32.const 16
644; CHECK-NEXT:    i32.add
645; CHECK-NEXT:    v16x8.load_splat 0
646; CHECK-NEXT:    # fallthrough-return
647  %q = ptrtoint i16* %p to i32
648  %r = add nsw i32 %q, 16
649  %s = inttoptr i32 %r to i16*
650  %e = load i16, i16* %s
651  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
652  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
653  ret <8 x i16> %v2
654}
655
656define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
657; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
658; CHECK:         .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128)
659; CHECK-NEXT:  # %bb.0:
660; CHECK-NEXT:    local.get 0
661; CHECK-NEXT:    i32.const 16
662; CHECK-NEXT:    i32.add
663; CHECK-NEXT:    i16x8.load8x8_s 0
664; CHECK-NEXT:    # fallthrough-return
665  %q = ptrtoint <8 x i8>* %p to i32
666  %r = add nsw i32 %q, 16
667  %s = inttoptr i32 %r to <8 x i8>*
668  %v = load <8 x i8>, <8 x i8>* %s
669  %v2 = sext <8 x i8> %v to <8 x i16>
670  ret <8 x i16> %v2
671}
672
673define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
674; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
675; CHECK:         .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128)
676; CHECK-NEXT:  # %bb.0:
677; CHECK-NEXT:    local.get 0
678; CHECK-NEXT:    i32.const 16
679; CHECK-NEXT:    i32.add
680; CHECK-NEXT:    i16x8.load8x8_u 0
681; CHECK-NEXT:    # fallthrough-return
682  %q = ptrtoint <8 x i8>* %p to i32
683  %r = add nsw i32 %q, 16
684  %s = inttoptr i32 %r to <8 x i8>*
685  %v = load <8 x i8>, <8 x i8>* %s
686  %v2 = zext <8 x i8> %v to <8 x i16>
687  ret <8 x i16> %v2
688}
689
690define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
691; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
692; CHECK:         .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128)
693; CHECK-NEXT:  # %bb.0:
694; CHECK-NEXT:    local.get 0
695; CHECK-NEXT:    i32.const 16
696; CHECK-NEXT:    i32.add
697; CHECK-NEXT:    i16x8.load8x8_u 0
698; CHECK-NEXT:    # fallthrough-return
699  %q = ptrtoint <8 x i8>* %p to i32
700  %r = add nsw i32 %q, 16
701  %s = inttoptr i32 %r to <8 x i8>*
702  %v = load <8 x i8>, <8 x i8>* %s
703  ret <8 x i8> %v
704}
705
706define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
707; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
708; CHECK:         .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128)
709; CHECK-NEXT:  # %bb.0:
710; CHECK-NEXT:    local.get 0
711; CHECK-NEXT:    i32.const 16
712; CHECK-NEXT:    i32.add
713; CHECK-NEXT:    v128.load 0
714; CHECK-NEXT:    # fallthrough-return
715  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
716  %v = load <8 x i16>, <8 x i16>* %s
717  ret <8 x i16> %v
718}
719
720define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
721; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
722; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128)
723; CHECK-NEXT:  # %bb.0:
724; CHECK-NEXT:    local.get 0
725; CHECK-NEXT:    i32.const 2
726; CHECK-NEXT:    i32.add
727; CHECK-NEXT:    v16x8.load_splat 0
728; CHECK-NEXT:    # fallthrough-return
729  %s = getelementptr i16, i16* %p, i32 1
730  %e = load i16, i16* %s
731  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
732  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
733  ret <8 x i16> %v2
734}
735
736define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
737; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
738; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
739; CHECK-NEXT:  # %bb.0:
740; CHECK-NEXT:    local.get 0
741; CHECK-NEXT:    i32.const 8
742; CHECK-NEXT:    i32.add
743; CHECK-NEXT:    i16x8.load8x8_s 0
744; CHECK-NEXT:    # fallthrough-return
745  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
746  %v = load <8 x i8>, <8 x i8>* %s
747  %v2 = sext <8 x i8> %v to <8 x i16>
748  ret <8 x i16> %v2
749}
750
751define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
752; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
753; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
754; CHECK-NEXT:  # %bb.0:
755; CHECK-NEXT:    local.get 0
756; CHECK-NEXT:    i32.const 8
757; CHECK-NEXT:    i32.add
758; CHECK-NEXT:    i16x8.load8x8_u 0
759; CHECK-NEXT:    # fallthrough-return
760  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
761  %v = load <8 x i8>, <8 x i8>* %s
762  %v2 = zext <8 x i8> %v to <8 x i16>
763  ret <8 x i16> %v2
764}
765
766define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
767; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
768; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
769; CHECK-NEXT:  # %bb.0:
770; CHECK-NEXT:    local.get 0
771; CHECK-NEXT:    i32.const 8
772; CHECK-NEXT:    i32.add
773; CHECK-NEXT:    i16x8.load8x8_u 0
774; CHECK-NEXT:    # fallthrough-return
775  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
776  %v = load <8 x i8>, <8 x i8>* %s
777  ret <8 x i8> %v
778}
779
780define <8 x i16> @load_v8i16_from_numeric_address() {
781; CHECK-LABEL: load_v8i16_from_numeric_address:
782; CHECK:         .functype load_v8i16_from_numeric_address () -> (v128)
783; CHECK-NEXT:  # %bb.0:
784; CHECK-NEXT:    i32.const 0
785; CHECK-NEXT:    v128.load 32
786; CHECK-NEXT:    # fallthrough-return
787  %s = inttoptr i32 32 to <8 x i16>*
788  %v = load <8 x i16>, <8 x i16>* %s
789  ret <8 x i16> %v
790}
791
792define <8 x i16> @load_splat_v8i16_from_numeric_address() {
793; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
794; CHECK:         .functype load_splat_v8i16_from_numeric_address () -> (v128)
795; CHECK-NEXT:  # %bb.0:
796; CHECK-NEXT:    i32.const 0
797; CHECK-NEXT:    v16x8.load_splat 32
798; CHECK-NEXT:    # fallthrough-return
799  %s = inttoptr i32 32 to i16*
800  %e = load i16, i16* %s
801  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
802  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
803  ret <8 x i16> %v2
804}
805
806define <8 x i16> @load_sext_v8i16_from_numeric_address() {
807; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
808; CHECK:         .functype load_sext_v8i16_from_numeric_address () -> (v128)
809; CHECK-NEXT:  # %bb.0:
810; CHECK-NEXT:    i32.const 0
811; CHECK-NEXT:    i16x8.load8x8_s 32
812; CHECK-NEXT:    # fallthrough-return
813  %s = inttoptr i32 32 to <8 x i8>*
814  %v = load <8 x i8>, <8 x i8>* %s
815  %v2 = sext <8 x i8> %v to <8 x i16>
816  ret <8 x i16> %v2
817}
818
819define <8 x i16> @load_zext_v8i16_from_numeric_address() {
820; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
821; CHECK:         .functype load_zext_v8i16_from_numeric_address () -> (v128)
822; CHECK-NEXT:  # %bb.0:
823; CHECK-NEXT:    i32.const 0
824; CHECK-NEXT:    i16x8.load8x8_u 32
825; CHECK-NEXT:    # fallthrough-return
826  %s = inttoptr i32 32 to <8 x i8>*
827  %v = load <8 x i8>, <8 x i8>* %s
828  %v2 = zext <8 x i8> %v to <8 x i16>
829  ret <8 x i16> %v2
830}
831
832define <8 x i8> @load_ext_v8i16_from_numeric_address() {
833; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
834; CHECK:         .functype load_ext_v8i16_from_numeric_address () -> (v128)
835; CHECK-NEXT:  # %bb.0:
836; CHECK-NEXT:    i32.const 0
837; CHECK-NEXT:    i16x8.load8x8_u 32
838; CHECK-NEXT:    # fallthrough-return
839  %s = inttoptr i32 32 to <8 x i8>*
840  %v = load <8 x i8>, <8 x i8>* %s
841  ret <8 x i8> %v
842}
843
844@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
845define <8 x i16> @load_v8i16_from_global_address() {
846; CHECK-LABEL: load_v8i16_from_global_address:
847; CHECK:         .functype load_v8i16_from_global_address () -> (v128)
848; CHECK-NEXT:  # %bb.0:
849; CHECK-NEXT:    i32.const 0
850; CHECK-NEXT:    v128.load gv_v8i16
851; CHECK-NEXT:    # fallthrough-return
852  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
853  ret <8 x i16> %v
854}
855
856@gv_i16 = global i16 42
857define <8 x i16> @load_splat_v8i16_from_global_address() {
858; CHECK-LABEL: load_splat_v8i16_from_global_address:
859; CHECK:         .functype load_splat_v8i16_from_global_address () -> (v128)
860; CHECK-NEXT:  # %bb.0:
861; CHECK-NEXT:    i32.const 0
862; CHECK-NEXT:    v16x8.load_splat gv_i16
863; CHECK-NEXT:    # fallthrough-return
864  %e = load i16, i16* @gv_i16
865  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
866  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
867  ret <8 x i16> %v2
868}
869
870@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
871define <8 x i16> @load_sext_v8i16_from_global_address() {
872; CHECK-LABEL: load_sext_v8i16_from_global_address:
873; CHECK:         .functype load_sext_v8i16_from_global_address () -> (v128)
874; CHECK-NEXT:  # %bb.0:
875; CHECK-NEXT:    i32.const 0
876; CHECK-NEXT:    i16x8.load8x8_s gv_v8i8
877; CHECK-NEXT:    # fallthrough-return
878  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
879  %v2 = sext <8 x i8> %v to <8 x i16>
880  ret <8 x i16> %v2
881}
882
883define <8 x i16> @load_zext_v8i16_from_global_address() {
884; CHECK-LABEL: load_zext_v8i16_from_global_address:
885; CHECK:         .functype load_zext_v8i16_from_global_address () -> (v128)
886; CHECK-NEXT:  # %bb.0:
887; CHECK-NEXT:    i32.const 0
888; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
889; CHECK-NEXT:    # fallthrough-return
890  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
891  %v2 = zext <8 x i8> %v to <8 x i16>
892  ret <8 x i16> %v2
893}
894
895define <8 x i8> @load_ext_v8i16_from_global_address() {
896; CHECK-LABEL: load_ext_v8i16_from_global_address:
897; CHECK:         .functype load_ext_v8i16_from_global_address () -> (v128)
898; CHECK-NEXT:  # %bb.0:
899; CHECK-NEXT:    i32.const 0
900; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
901; CHECK-NEXT:    # fallthrough-return
902  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
903  ret <8 x i8> %v
904}
905
906
907define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
908; CHECK-LABEL: store_v8i16:
909; CHECK:         .functype store_v8i16 (v128, i32) -> ()
910; CHECK-NEXT:  # %bb.0:
911; CHECK-NEXT:    local.get 1
912; CHECK-NEXT:    local.get 0
913; CHECK-NEXT:    v128.store 0
914; CHECK-NEXT:    # fallthrough-return
915  store <8 x i16> %v , <8 x i16>* %p
916  ret void
917}
918
919define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
920; CHECK-LABEL: store_v8i16_with_folded_offset:
921; CHECK:         .functype store_v8i16_with_folded_offset (v128, i32) -> ()
922; CHECK-NEXT:  # %bb.0:
923; CHECK-NEXT:    local.get 1
924; CHECK-NEXT:    local.get 0
925; CHECK-NEXT:    v128.store 16
926; CHECK-NEXT:    # fallthrough-return
927  %q = ptrtoint <8 x i16>* %p to i32
928  %r = add nuw i32 %q, 16
929  %s = inttoptr i32 %r to <8 x i16>*
930  store <8 x i16> %v , <8 x i16>* %s
931  ret void
932}
933
934define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
935; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
936; CHECK:         .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
937; CHECK-NEXT:  # %bb.0:
938; CHECK-NEXT:    local.get 1
939; CHECK-NEXT:    local.get 0
940; CHECK-NEXT:    v128.store 16
941; CHECK-NEXT:    # fallthrough-return
942  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
943  store <8 x i16> %v , <8 x i16>* %s
944  ret void
945}
946
947define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
948; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
949; CHECK:         .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
950; CHECK-NEXT:  # %bb.0:
951; CHECK-NEXT:    local.get 1
952; CHECK-NEXT:    i32.const -16
953; CHECK-NEXT:    i32.add
954; CHECK-NEXT:    local.get 0
955; CHECK-NEXT:    v128.store 0
956; CHECK-NEXT:    # fallthrough-return
957  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
958  store <8 x i16> %v , <8 x i16>* %s
959  ret void
960}
961
962define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
963; CHECK-LABEL: store_v8i16_with_unfolded_offset:
964; CHECK:         .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
965; CHECK-NEXT:  # %bb.0:
966; CHECK-NEXT:    local.get 1
967; CHECK-NEXT:    i32.const -16
968; CHECK-NEXT:    i32.add
969; CHECK-NEXT:    local.get 0
970; CHECK-NEXT:    v128.store 0
971; CHECK-NEXT:    # fallthrough-return
972  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
973  store <8 x i16> %v , <8 x i16>* %s
974  ret void
975}
976
977define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
978; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
979; CHECK:         .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
980; CHECK-NEXT:  # %bb.0:
981; CHECK-NEXT:    local.get 1
982; CHECK-NEXT:    i32.const 16
983; CHECK-NEXT:    i32.add
984; CHECK-NEXT:    local.get 0
985; CHECK-NEXT:    v128.store 0
986; CHECK-NEXT:    # fallthrough-return
987  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
988  store <8 x i16> %v , <8 x i16>* %s
989  ret void
990}
991
992define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
993; CHECK-LABEL: store_v8i16_to_numeric_address:
994; CHECK:         .functype store_v8i16_to_numeric_address (v128) -> ()
995; CHECK-NEXT:  # %bb.0:
996; CHECK-NEXT:    i32.const 0
997; CHECK-NEXT:    local.get 0
998; CHECK-NEXT:    v128.store 32
999; CHECK-NEXT:    # fallthrough-return
1000  %s = inttoptr i32 32 to <8 x i16>*
1001  store <8 x i16> %v , <8 x i16>* %s
1002  ret void
1003}
1004
1005define void @store_v8i16_to_global_address(<8 x i16> %v) {
1006; CHECK-LABEL: store_v8i16_to_global_address:
1007; CHECK:         .functype store_v8i16_to_global_address (v128) -> ()
1008; CHECK-NEXT:  # %bb.0:
1009; CHECK-NEXT:    i32.const 0
1010; CHECK-NEXT:    local.get 0
1011; CHECK-NEXT:    v128.store gv_v8i16
1012; CHECK-NEXT:    # fallthrough-return
1013  store <8 x i16> %v , <8 x i16>* @gv_v8i16
1014  ret void
1015}
1016
1017; ==============================================================================
1018; 4 x i32
1019; ==============================================================================
1020define <4 x i32> @load_v4i32(<4 x i32>* %p) {
1021; CHECK-LABEL: load_v4i32:
1022; CHECK:         .functype load_v4i32 (i32) -> (v128)
1023; CHECK-NEXT:  # %bb.0:
1024; CHECK-NEXT:    local.get 0
1025; CHECK-NEXT:    v128.load 0
1026; CHECK-NEXT:    # fallthrough-return
1027  %v = load <4 x i32>, <4 x i32>* %p
1028  ret <4 x i32> %v
1029}
1030
1031define <4 x i32> @load_splat_v4i32(i32* %addr) {
1032; CHECK-LABEL: load_splat_v4i32:
1033; CHECK:         .functype load_splat_v4i32 (i32) -> (v128)
1034; CHECK-NEXT:  # %bb.0:
1035; CHECK-NEXT:    local.get 0
1036; CHECK-NEXT:    v32x4.load_splat 0
1037; CHECK-NEXT:    # fallthrough-return
1038  %e = load i32, i32* %addr, align 4
1039  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1040  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1041  ret <4 x i32> %v2
1042}
1043
1044define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
1045; CHECK-LABEL: load_sext_v4i32:
1046; CHECK:         .functype load_sext_v4i32 (i32) -> (v128)
1047; CHECK-NEXT:  # %bb.0:
1048; CHECK-NEXT:    local.get 0
1049; CHECK-NEXT:    i32x4.load16x4_s 0
1050; CHECK-NEXT:    # fallthrough-return
1051  %v = load <4 x i16>, <4 x i16>* %p
1052  %v2 = sext <4 x i16> %v to <4 x i32>
1053  ret <4 x i32> %v2
1054}
1055
1056define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
1057; CHECK-LABEL: load_zext_v4i32:
1058; CHECK:         .functype load_zext_v4i32 (i32) -> (v128)
1059; CHECK-NEXT:  # %bb.0:
1060; CHECK-NEXT:    local.get 0
1061; CHECK-NEXT:    i32x4.load16x4_u 0
1062; CHECK-NEXT:    # fallthrough-return
1063  %v = load <4 x i16>, <4 x i16>* %p
1064  %v2 = zext <4 x i16> %v to <4 x i32>
1065  ret <4 x i32> %v2
1066}
1067
1068define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
1069; CHECK-LABEL: load_ext_v4i32:
1070; CHECK:         .functype load_ext_v4i32 (i32) -> (v128)
1071; CHECK-NEXT:  # %bb.0:
1072; CHECK-NEXT:    local.get 0
1073; CHECK-NEXT:    i32x4.load16x4_u 0
1074; CHECK-NEXT:    # fallthrough-return
1075  %v = load <4 x i16>, <4 x i16>* %p
1076  ret <4 x i16> %v
1077}
1078
1079define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
1080; CHECK-LABEL: load_v4i32_with_folded_offset:
1081; CHECK:         .functype load_v4i32_with_folded_offset (i32) -> (v128)
1082; CHECK-NEXT:  # %bb.0:
1083; CHECK-NEXT:    local.get 0
1084; CHECK-NEXT:    v128.load 16
1085; CHECK-NEXT:    # fallthrough-return
1086  %q = ptrtoint <4 x i32>* %p to i32
1087  %r = add nuw i32 %q, 16
1088  %s = inttoptr i32 %r to <4 x i32>*
1089  %v = load <4 x i32>, <4 x i32>* %s
1090  ret <4 x i32> %v
1091}
1092
1093define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
1094; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1095; CHECK:         .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
1096; CHECK-NEXT:  # %bb.0:
1097; CHECK-NEXT:    local.get 0
1098; CHECK-NEXT:    v32x4.load_splat 16
1099; CHECK-NEXT:    # fallthrough-return
1100  %q = ptrtoint i32* %p to i32
1101  %r = add nuw i32 %q, 16
1102  %s = inttoptr i32 %r to i32*
1103  %e = load i32, i32* %s
1104  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1105  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1106  ret <4 x i32> %v2
1107}
1108
1109define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
1110; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
1111; CHECK:         .functype load_sext_v4i32_with_folded_offset (i32) -> (v128)
1112; CHECK-NEXT:  # %bb.0:
1113; CHECK-NEXT:    local.get 0
1114; CHECK-NEXT:    i32x4.load16x4_s 16
1115; CHECK-NEXT:    # fallthrough-return
1116  %q = ptrtoint <4 x i16>* %p to i32
1117  %r = add nuw i32 %q, 16
1118  %s = inttoptr i32 %r to <4 x i16>*
1119  %v = load <4 x i16>, <4 x i16>* %s
1120  %v2 = sext <4 x i16> %v to <4 x i32>
1121  ret <4 x i32> %v2
1122}
1123
1124define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
1125; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
1126; CHECK:         .functype load_zext_v4i32_with_folded_offset (i32) -> (v128)
1127; CHECK-NEXT:  # %bb.0:
1128; CHECK-NEXT:    local.get 0
1129; CHECK-NEXT:    i32x4.load16x4_u 16
1130; CHECK-NEXT:    # fallthrough-return
1131  %q = ptrtoint <4 x i16>* %p to i32
1132  %r = add nuw i32 %q, 16
1133  %s = inttoptr i32 %r to <4 x i16>*
1134  %v = load <4 x i16>, <4 x i16>* %s
1135  %v2 = zext <4 x i16> %v to <4 x i32>
1136  ret <4 x i32> %v2
1137}
1138
1139define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
1140; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1141; CHECK:         .functype load_ext_v4i32_with_folded_offset (i32) -> (v128)
1142; CHECK-NEXT:  # %bb.0:
1143; CHECK-NEXT:    local.get 0
1144; CHECK-NEXT:    i32x4.load16x4_u 16
1145; CHECK-NEXT:    # fallthrough-return
1146  %q = ptrtoint <4 x i16>* %p to i32
1147  %r = add nuw i32 %q, 16
1148  %s = inttoptr i32 %r to <4 x i16>*
1149  %v = load <4 x i16>, <4 x i16>* %s
1150  ret <4 x i16> %v
1151}
1152
1153define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
1154; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1155; CHECK:         .functype load_v4i32_with_folded_gep_offset (i32) -> (v128)
1156; CHECK-NEXT:  # %bb.0:
1157; CHECK-NEXT:    local.get 0
1158; CHECK-NEXT:    v128.load 16
1159; CHECK-NEXT:    # fallthrough-return
1160  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1161  %v = load <4 x i32>, <4 x i32>* %s
1162  ret <4 x i32> %v
1163}
1164
1165define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
1166; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1167; CHECK:         .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
1168; CHECK-NEXT:  # %bb.0:
1169; CHECK-NEXT:    local.get 0
1170; CHECK-NEXT:    v32x4.load_splat 4
1171; CHECK-NEXT:    # fallthrough-return
1172  %s = getelementptr inbounds i32, i32* %p, i32 1
1173  %e = load i32, i32* %s
1174  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1175  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1176  ret <4 x i32> %v2
1177}
1178
1179define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1180; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
1181; CHECK:         .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128)
1182; CHECK-NEXT:  # %bb.0:
1183; CHECK-NEXT:    local.get 0
1184; CHECK-NEXT:    i32x4.load16x4_s 8
1185; CHECK-NEXT:    # fallthrough-return
1186  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1187  %v = load <4 x i16>, <4 x i16>* %s
1188  %v2 = sext <4 x i16> %v to <4 x i32>
1189  ret <4 x i32> %v2
1190}
1191
1192define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1193; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
1194; CHECK:         .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128)
1195; CHECK-NEXT:  # %bb.0:
1196; CHECK-NEXT:    local.get 0
1197; CHECK-NEXT:    i32x4.load16x4_u 8
1198; CHECK-NEXT:    # fallthrough-return
1199  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1200  %v = load <4 x i16>, <4 x i16>* %s
1201  %v2 = zext <4 x i16> %v to <4 x i32>
1202  ret <4 x i32> %v2
1203}
1204
1205define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1206; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1207; CHECK:         .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128)
1208; CHECK-NEXT:  # %bb.0:
1209; CHECK-NEXT:    local.get 0
1210; CHECK-NEXT:    i32x4.load16x4_u 8
1211; CHECK-NEXT:    # fallthrough-return
1212  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1213  %v = load <4 x i16>, <4 x i16>* %s
1214  ret <4 x i16> %v
1215}
1216
1217define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
1218; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1219; CHECK:         .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1220; CHECK-NEXT:  # %bb.0:
1221; CHECK-NEXT:    local.get 0
1222; CHECK-NEXT:    i32.const -16
1223; CHECK-NEXT:    i32.add
1224; CHECK-NEXT:    v128.load 0
1225; CHECK-NEXT:    # fallthrough-return
1226  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1227  %v = load <4 x i32>, <4 x i32>* %s
1228  ret <4 x i32> %v
1229}
1230
1231define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
1232; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1233; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1234; CHECK-NEXT:  # %bb.0:
1235; CHECK-NEXT:    local.get 0
1236; CHECK-NEXT:    i32.const -4
1237; CHECK-NEXT:    i32.add
1238; CHECK-NEXT:    v32x4.load_splat 0
1239; CHECK-NEXT:    # fallthrough-return
1240  %s = getelementptr inbounds i32, i32* %p, i32 -1
1241  %e = load i32, i32* %s
1242  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1243  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1244  ret <4 x i32> %v2
1245}
1246
1247define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1248; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
1249; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1250; CHECK-NEXT:  # %bb.0:
1251; CHECK-NEXT:    local.get 0
1252; CHECK-NEXT:    i32.const -8
1253; CHECK-NEXT:    i32.add
1254; CHECK-NEXT:    i32x4.load16x4_s 0
1255; CHECK-NEXT:    # fallthrough-return
1256  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1257  %v = load <4 x i16>, <4 x i16>* %s
1258  %v2 = sext <4 x i16> %v to <4 x i32>
1259  ret <4 x i32> %v2
1260}
1261
1262define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1263; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
1264; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1265; CHECK-NEXT:  # %bb.0:
1266; CHECK-NEXT:    local.get 0
1267; CHECK-NEXT:    i32.const -8
1268; CHECK-NEXT:    i32.add
1269; CHECK-NEXT:    i32x4.load16x4_u 0
1270; CHECK-NEXT:    # fallthrough-return
1271  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1272  %v = load <4 x i16>, <4 x i16>* %s
1273  %v2 = zext <4 x i16> %v to <4 x i32>
1274  ret <4 x i32> %v2
1275}
1276
1277define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1278; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1279; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1280; CHECK-NEXT:  # %bb.0:
1281; CHECK-NEXT:    local.get 0
1282; CHECK-NEXT:    i32.const -8
1283; CHECK-NEXT:    i32.add
1284; CHECK-NEXT:    i32x4.load16x4_u 0
1285; CHECK-NEXT:    # fallthrough-return
1286  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1287  %v = load <4 x i16>, <4 x i16>* %s
1288  ret <4 x i16> %v
1289}
1290
1291define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
1292; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1293; CHECK:         .functype load_v4i32_with_unfolded_offset (i32) -> (v128)
1294; CHECK-NEXT:  # %bb.0:
1295; CHECK-NEXT:    local.get 0
1296; CHECK-NEXT:    i32.const 16
1297; CHECK-NEXT:    i32.add
1298; CHECK-NEXT:    v128.load 0
1299; CHECK-NEXT:    # fallthrough-return
1300  %q = ptrtoint <4 x i32>* %p to i32
1301  %r = add nsw i32 %q, 16
1302  %s = inttoptr i32 %r to <4 x i32>*
1303  %v = load <4 x i32>, <4 x i32>* %s
1304  ret <4 x i32> %v
1305}
1306
1307define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
1308; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1309; CHECK:         .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128)
1310; CHECK-NEXT:  # %bb.0:
1311; CHECK-NEXT:    local.get 0
1312; CHECK-NEXT:    i32.const 16
1313; CHECK-NEXT:    i32.add
1314; CHECK-NEXT:    v32x4.load_splat 0
1315; CHECK-NEXT:    # fallthrough-return
1316  %q = ptrtoint i32* %p to i32
1317  %r = add nsw i32 %q, 16
1318  %s = inttoptr i32 %r to i32*
1319  %e = load i32, i32* %s
1320  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1321  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1322  ret <4 x i32> %v2
1323}
1324
1325define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1326; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
1327; CHECK:         .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128)
1328; CHECK-NEXT:  # %bb.0:
1329; CHECK-NEXT:    local.get 0
1330; CHECK-NEXT:    i32.const 16
1331; CHECK-NEXT:    i32.add
1332; CHECK-NEXT:    i32x4.load16x4_s 0
1333; CHECK-NEXT:    # fallthrough-return
1334  %q = ptrtoint <4 x i16>* %p to i32
1335  %r = add nsw i32 %q, 16
1336  %s = inttoptr i32 %r to <4 x i16>*
1337  %v = load <4 x i16>, <4 x i16>* %s
1338  %v2 = sext <4 x i16> %v to <4 x i32>
1339  ret <4 x i32> %v2
1340}
1341
1342define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1343; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
1344; CHECK:         .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128)
1345; CHECK-NEXT:  # %bb.0:
1346; CHECK-NEXT:    local.get 0
1347; CHECK-NEXT:    i32.const 16
1348; CHECK-NEXT:    i32.add
1349; CHECK-NEXT:    i32x4.load16x4_u 0
1350; CHECK-NEXT:    # fallthrough-return
1351  %q = ptrtoint <4 x i16>* %p to i32
1352  %r = add nsw i32 %q, 16
1353  %s = inttoptr i32 %r to <4 x i16>*
1354  %v = load <4 x i16>, <4 x i16>* %s
1355  %v2 = zext <4 x i16> %v to <4 x i32>
1356  ret <4 x i32> %v2
1357}
1358
1359define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1360; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1361; CHECK:         .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128)
1362; CHECK-NEXT:  # %bb.0:
1363; CHECK-NEXT:    local.get 0
1364; CHECK-NEXT:    i32.const 16
1365; CHECK-NEXT:    i32.add
1366; CHECK-NEXT:    i32x4.load16x4_u 0
1367; CHECK-NEXT:    # fallthrough-return
1368  %q = ptrtoint <4 x i16>* %p to i32
1369  %r = add nsw i32 %q, 16
1370  %s = inttoptr i32 %r to <4 x i16>*
1371  %v = load <4 x i16>, <4 x i16>* %s
1372  ret <4 x i16> %v
1373}
1374
1375define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
1376; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1377; CHECK:         .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1378; CHECK-NEXT:  # %bb.0:
1379; CHECK-NEXT:    local.get 0
1380; CHECK-NEXT:    i32.const 16
1381; CHECK-NEXT:    i32.add
1382; CHECK-NEXT:    v128.load 0
1383; CHECK-NEXT:    # fallthrough-return
1384  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1385  %v = load <4 x i32>, <4 x i32>* %s
1386  ret <4 x i32> %v
1387}
1388
1389define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
1390; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1391; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1392; CHECK-NEXT:  # %bb.0:
1393; CHECK-NEXT:    local.get 0
1394; CHECK-NEXT:    i32.const 4
1395; CHECK-NEXT:    i32.add
1396; CHECK-NEXT:    v32x4.load_splat 0
1397; CHECK-NEXT:    # fallthrough-return
1398  %s = getelementptr i32, i32* %p, i32 1
1399  %e = load i32, i32* %s
1400  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1401  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1402  ret <4 x i32> %v2
1403}
1404
1405define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1406; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
1407; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1408; CHECK-NEXT:  # %bb.0:
1409; CHECK-NEXT:    local.get 0
1410; CHECK-NEXT:    i32.const 8
1411; CHECK-NEXT:    i32.add
1412; CHECK-NEXT:    i32x4.load16x4_s 0
1413; CHECK-NEXT:    # fallthrough-return
1414  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1415  %v = load <4 x i16>, <4 x i16>* %s
1416  %v2 = sext <4 x i16> %v to <4 x i32>
1417  ret <4 x i32> %v2
1418}
1419
1420define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1421; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
1422; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1423; CHECK-NEXT:  # %bb.0:
1424; CHECK-NEXT:    local.get 0
1425; CHECK-NEXT:    i32.const 8
1426; CHECK-NEXT:    i32.add
1427; CHECK-NEXT:    i32x4.load16x4_u 0
1428; CHECK-NEXT:    # fallthrough-return
1429  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1430  %v = load <4 x i16>, <4 x i16>* %s
1431  %v2 = zext <4 x i16> %v to <4 x i32>
1432  ret <4 x i32> %v2
1433}
1434
1435define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1436; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1437; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1438; CHECK-NEXT:  # %bb.0:
1439; CHECK-NEXT:    local.get 0
1440; CHECK-NEXT:    i32.const 8
1441; CHECK-NEXT:    i32.add
1442; CHECK-NEXT:    i32x4.load16x4_u 0
1443; CHECK-NEXT:    # fallthrough-return
1444  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1445  %v = load <4 x i16>, <4 x i16>* %s
1446  ret <4 x i16> %v
1447}
1448
1449define <4 x i32> @load_v4i32_from_numeric_address() {
1450; CHECK-LABEL: load_v4i32_from_numeric_address:
1451; CHECK:         .functype load_v4i32_from_numeric_address () -> (v128)
1452; CHECK-NEXT:  # %bb.0:
1453; CHECK-NEXT:    i32.const 0
1454; CHECK-NEXT:    v128.load 32
1455; CHECK-NEXT:    # fallthrough-return
1456  %s = inttoptr i32 32 to <4 x i32>*
1457  %v = load <4 x i32>, <4 x i32>* %s
1458  ret <4 x i32> %v
1459}
1460
1461define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1462; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1463; CHECK:         .functype load_splat_v4i32_from_numeric_address () -> (v128)
1464; CHECK-NEXT:  # %bb.0:
1465; CHECK-NEXT:    i32.const 0
1466; CHECK-NEXT:    v32x4.load_splat 32
1467; CHECK-NEXT:    # fallthrough-return
1468  %s = inttoptr i32 32 to i32*
1469  %e = load i32, i32* %s
1470  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1471  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1472  ret <4 x i32> %v2
1473}
1474
1475define <4 x i32> @load_sext_v4i32_from_numeric_address() {
1476; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
1477; CHECK:         .functype load_sext_v4i32_from_numeric_address () -> (v128)
1478; CHECK-NEXT:  # %bb.0:
1479; CHECK-NEXT:    i32.const 0
1480; CHECK-NEXT:    i32x4.load16x4_s 32
1481; CHECK-NEXT:    # fallthrough-return
1482  %s = inttoptr i32 32 to <4 x i16>*
1483  %v = load <4 x i16>, <4 x i16>* %s
1484  %v2 = sext <4 x i16> %v to <4 x i32>
1485  ret <4 x i32> %v2
1486}
1487
1488define <4 x i32> @load_zext_v4i32_from_numeric_address() {
1489; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
1490; CHECK:         .functype load_zext_v4i32_from_numeric_address () -> (v128)
1491; CHECK-NEXT:  # %bb.0:
1492; CHECK-NEXT:    i32.const 0
1493; CHECK-NEXT:    i32x4.load16x4_u 32
1494; CHECK-NEXT:    # fallthrough-return
1495  %s = inttoptr i32 32 to <4 x i16>*
1496  %v = load <4 x i16>, <4 x i16>* %s
1497  %v2 = zext <4 x i16> %v to <4 x i32>
1498  ret <4 x i32> %v2
1499}
1500
1501define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1502; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1503; CHECK:         .functype load_ext_v4i32_from_numeric_address () -> (v128)
1504; CHECK-NEXT:  # %bb.0:
1505; CHECK-NEXT:    i32.const 0
1506; CHECK-NEXT:    i32x4.load16x4_u 32
1507; CHECK-NEXT:    # fallthrough-return
1508  %s = inttoptr i32 32 to <4 x i16>*
1509  %v = load <4 x i16>, <4 x i16>* %s
1510  ret <4 x i16> %v
1511}
1512
1513@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1514define <4 x i32> @load_v4i32_from_global_address() {
1515; CHECK-LABEL: load_v4i32_from_global_address:
1516; CHECK:         .functype load_v4i32_from_global_address () -> (v128)
1517; CHECK-NEXT:  # %bb.0:
1518; CHECK-NEXT:    i32.const 0
1519; CHECK-NEXT:    v128.load gv_v4i32
1520; CHECK-NEXT:    # fallthrough-return
1521  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
1522  ret <4 x i32> %v
1523}
1524
1525@gv_i32 = global i32 42
1526define <4 x i32> @load_splat_v4i32_from_global_address() {
1527; CHECK-LABEL: load_splat_v4i32_from_global_address:
1528; CHECK:         .functype load_splat_v4i32_from_global_address () -> (v128)
1529; CHECK-NEXT:  # %bb.0:
1530; CHECK-NEXT:    i32.const 0
1531; CHECK-NEXT:    v32x4.load_splat gv_i32
1532; CHECK-NEXT:    # fallthrough-return
1533  %e = load i32, i32* @gv_i32
1534  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1535  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1536  ret <4 x i32> %v2
1537}
1538
1539@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1540define <4 x i32> @load_sext_v4i32_from_global_address() {
1541; CHECK-LABEL: load_sext_v4i32_from_global_address:
1542; CHECK:         .functype load_sext_v4i32_from_global_address () -> (v128)
1543; CHECK-NEXT:  # %bb.0:
1544; CHECK-NEXT:    i32.const 0
1545; CHECK-NEXT:    i32x4.load16x4_s gv_v4i16
1546; CHECK-NEXT:    # fallthrough-return
1547  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1548  %v2 = sext <4 x i16> %v to <4 x i32>
1549  ret <4 x i32> %v2
1550}
1551
1552define <4 x i32> @load_zext_v4i32_from_global_address() {
1553; CHECK-LABEL: load_zext_v4i32_from_global_address:
1554; CHECK:         .functype load_zext_v4i32_from_global_address () -> (v128)
1555; CHECK-NEXT:  # %bb.0:
1556; CHECK-NEXT:    i32.const 0
1557; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1558; CHECK-NEXT:    # fallthrough-return
1559  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1560  %v2 = zext <4 x i16> %v to <4 x i32>
1561  ret <4 x i32> %v2
1562}
1563
1564define <4 x i16> @load_ext_v4i32_from_global_address() {
1565; CHECK-LABEL: load_ext_v4i32_from_global_address:
1566; CHECK:         .functype load_ext_v4i32_from_global_address () -> (v128)
1567; CHECK-NEXT:  # %bb.0:
1568; CHECK-NEXT:    i32.const 0
1569; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1570; CHECK-NEXT:    # fallthrough-return
1571  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1572  ret <4 x i16> %v
1573}
1574
1575define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
1576; CHECK-LABEL: store_v4i32:
1577; CHECK:         .functype store_v4i32 (v128, i32) -> ()
1578; CHECK-NEXT:  # %bb.0:
1579; CHECK-NEXT:    local.get 1
1580; CHECK-NEXT:    local.get 0
1581; CHECK-NEXT:    v128.store 0
1582; CHECK-NEXT:    # fallthrough-return
1583  store <4 x i32> %v , <4 x i32>* %p
1584  ret void
1585}
1586
1587define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
1588; CHECK-LABEL: store_v4i32_with_folded_offset:
1589; CHECK:         .functype store_v4i32_with_folded_offset (v128, i32) -> ()
1590; CHECK-NEXT:  # %bb.0:
1591; CHECK-NEXT:    local.get 1
1592; CHECK-NEXT:    local.get 0
1593; CHECK-NEXT:    v128.store 16
1594; CHECK-NEXT:    # fallthrough-return
1595  %q = ptrtoint <4 x i32>* %p to i32
1596  %r = add nuw i32 %q, 16
1597  %s = inttoptr i32 %r to <4 x i32>*
1598  store <4 x i32> %v , <4 x i32>* %s
1599  ret void
1600}
1601
1602define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1603; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
1604; CHECK:         .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
1605; CHECK-NEXT:  # %bb.0:
1606; CHECK-NEXT:    local.get 1
1607; CHECK-NEXT:    local.get 0
1608; CHECK-NEXT:    v128.store 16
1609; CHECK-NEXT:    # fallthrough-return
1610  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1611  store <4 x i32> %v , <4 x i32>* %s
1612  ret void
1613}
1614
1615define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
1616; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
1617; CHECK:         .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1618; CHECK-NEXT:  # %bb.0:
1619; CHECK-NEXT:    local.get 1
1620; CHECK-NEXT:    i32.const -16
1621; CHECK-NEXT:    i32.add
1622; CHECK-NEXT:    local.get 0
1623; CHECK-NEXT:    v128.store 0
1624; CHECK-NEXT:    # fallthrough-return
1625  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1626  store <4 x i32> %v , <4 x i32>* %s
1627  ret void
1628}
1629
1630define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
1631; CHECK-LABEL: store_v4i32_with_unfolded_offset:
1632; CHECK:         .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
1633; CHECK-NEXT:  # %bb.0:
1634; CHECK-NEXT:    local.get 1
1635; CHECK-NEXT:    i32.const -16
1636; CHECK-NEXT:    i32.add
1637; CHECK-NEXT:    local.get 0
1638; CHECK-NEXT:    v128.store 0
1639; CHECK-NEXT:    # fallthrough-return
1640  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1641  store <4 x i32> %v , <4 x i32>* %s
1642  ret void
1643}
1644
1645define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1646; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
1647; CHECK:         .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
1648; CHECK-NEXT:  # %bb.0:
1649; CHECK-NEXT:    local.get 1
1650; CHECK-NEXT:    i32.const 16
1651; CHECK-NEXT:    i32.add
1652; CHECK-NEXT:    local.get 0
1653; CHECK-NEXT:    v128.store 0
1654; CHECK-NEXT:    # fallthrough-return
1655  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1656  store <4 x i32> %v , <4 x i32>* %s
1657  ret void
1658}
1659
1660define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
1661; CHECK-LABEL: store_v4i32_to_numeric_address:
1662; CHECK:         .functype store_v4i32_to_numeric_address (v128) -> ()
1663; CHECK-NEXT:  # %bb.0:
1664; CHECK-NEXT:    i32.const 0
1665; CHECK-NEXT:    local.get 0
1666; CHECK-NEXT:    v128.store 32
1667; CHECK-NEXT:    # fallthrough-return
1668  %s = inttoptr i32 32 to <4 x i32>*
1669  store <4 x i32> %v , <4 x i32>* %s
1670  ret void
1671}
1672
1673define void @store_v4i32_to_global_address(<4 x i32> %v) {
1674; CHECK-LABEL: store_v4i32_to_global_address:
1675; CHECK:         .functype store_v4i32_to_global_address (v128) -> ()
1676; CHECK-NEXT:  # %bb.0:
1677; CHECK-NEXT:    i32.const 0
1678; CHECK-NEXT:    local.get 0
1679; CHECK-NEXT:    v128.store gv_v4i32
1680; CHECK-NEXT:    # fallthrough-return
1681  store <4 x i32> %v , <4 x i32>* @gv_v4i32
1682  ret void
1683}
1684
1685; ==============================================================================
1686; 2 x i64
1687; ==============================================================================
1688define <2 x i64> @load_v2i64(<2 x i64>* %p) {
1689; CHECK-LABEL: load_v2i64:
1690; CHECK:         .functype load_v2i64 (i32) -> (v128)
1691; CHECK-NEXT:  # %bb.0:
1692; CHECK-NEXT:    local.get 0
1693; CHECK-NEXT:    v128.load 0
1694; CHECK-NEXT:    # fallthrough-return
1695  %v = load <2 x i64>, <2 x i64>* %p
1696  ret <2 x i64> %v
1697}
1698
1699define <2 x i64> @load_splat_v2i64(i64* %p) {
1700; CHECK-LABEL: load_splat_v2i64:
1701; CHECK:         .functype load_splat_v2i64 (i32) -> (v128)
1702; CHECK-NEXT:  # %bb.0:
1703; CHECK-NEXT:    local.get 0
1704; CHECK-NEXT:    v64x2.load_splat 0
1705; CHECK-NEXT:    # fallthrough-return
1706  %e = load i64, i64* %p
1707  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1708  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1709  ret <2 x i64> %v2
1710}
1711
1712define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
1713; CHECK-LABEL: load_sext_v2i64:
1714; CHECK:         .functype load_sext_v2i64 (i32) -> (v128)
1715; CHECK-NEXT:  # %bb.0:
1716; CHECK-NEXT:    local.get 0
1717; CHECK-NEXT:    i64x2.load32x2_s 0
1718; CHECK-NEXT:    # fallthrough-return
1719  %v = load <2 x i32>, <2 x i32>* %p
1720  %v2 = sext <2 x i32> %v to <2 x i64>
1721  ret <2 x i64> %v2
1722}
1723
1724define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
1725; CHECK-LABEL: load_zext_v2i64:
1726; CHECK:         .functype load_zext_v2i64 (i32) -> (v128)
1727; CHECK-NEXT:  # %bb.0:
1728; CHECK-NEXT:    local.get 0
1729; CHECK-NEXT:    i64x2.load32x2_u 0
1730; CHECK-NEXT:    # fallthrough-return
1731  %v = load <2 x i32>, <2 x i32>* %p
1732  %v2 = zext <2 x i32> %v to <2 x i64>
1733  ret <2 x i64> %v2
1734}
1735
1736define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
1737; CHECK-LABEL: load_ext_v2i64:
1738; CHECK:         .functype load_ext_v2i64 (i32) -> (v128)
1739; CHECK-NEXT:  # %bb.0:
1740; CHECK-NEXT:    local.get 0
1741; CHECK-NEXT:    i64x2.load32x2_u 0
1742; CHECK-NEXT:    # fallthrough-return
1743  %v = load <2 x i32>, <2 x i32>* %p
1744  ret <2 x i32> %v
1745}
1746
1747define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
1748; CHECK-LABEL: load_v2i64_with_folded_offset:
1749; CHECK:         .functype load_v2i64_with_folded_offset (i32) -> (v128)
1750; CHECK-NEXT:  # %bb.0:
1751; CHECK-NEXT:    local.get 0
1752; CHECK-NEXT:    v128.load 16
1753; CHECK-NEXT:    # fallthrough-return
1754  %q = ptrtoint <2 x i64>* %p to i32
1755  %r = add nuw i32 %q, 16
1756  %s = inttoptr i32 %r to <2 x i64>*
1757  %v = load <2 x i64>, <2 x i64>* %s
1758  ret <2 x i64> %v
1759}
1760
1761define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
1762; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
1763; CHECK:         .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
1764; CHECK-NEXT:  # %bb.0:
1765; CHECK-NEXT:    local.get 0
1766; CHECK-NEXT:    v64x2.load_splat 16
1767; CHECK-NEXT:    # fallthrough-return
1768  %q = ptrtoint i64* %p to i32
1769  %r = add nuw i32 %q, 16
1770  %s = inttoptr i32 %r to i64*
1771  %e = load i64, i64* %s
1772  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1773  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1774  ret <2 x i64> %v2
1775}
1776
1777define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
1778; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
1779; CHECK:         .functype load_sext_v2i64_with_folded_offset (i32) -> (v128)
1780; CHECK-NEXT:  # %bb.0:
1781; CHECK-NEXT:    local.get 0
1782; CHECK-NEXT:    i64x2.load32x2_s 16
1783; CHECK-NEXT:    # fallthrough-return
1784  %q = ptrtoint <2 x i32>* %p to i32
1785  %r = add nuw i32 %q, 16
1786  %s = inttoptr i32 %r to <2 x i32>*
1787  %v = load <2 x i32>, <2 x i32>* %s
1788  %v2 = sext <2 x i32> %v to <2 x i64>
1789  ret <2 x i64> %v2
1790}
1791
1792define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
1793; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
1794; CHECK:         .functype load_zext_v2i64_with_folded_offset (i32) -> (v128)
1795; CHECK-NEXT:  # %bb.0:
1796; CHECK-NEXT:    local.get 0
1797; CHECK-NEXT:    i64x2.load32x2_u 16
1798; CHECK-NEXT:    # fallthrough-return
1799  %q = ptrtoint <2 x i32>* %p to i32
1800  %r = add nuw i32 %q, 16
1801  %s = inttoptr i32 %r to <2 x i32>*
1802  %v = load <2 x i32>, <2 x i32>* %s
1803  %v2 = zext <2 x i32> %v to <2 x i64>
1804  ret <2 x i64> %v2
1805}
1806
1807define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
1808; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
1809; CHECK:         .functype load_ext_v2i64_with_folded_offset (i32) -> (v128)
1810; CHECK-NEXT:  # %bb.0:
1811; CHECK-NEXT:    local.get 0
1812; CHECK-NEXT:    i64x2.load32x2_u 16
1813; CHECK-NEXT:    # fallthrough-return
1814  %q = ptrtoint <2 x i32>* %p to i32
1815  %r = add nuw i32 %q, 16
1816  %s = inttoptr i32 %r to <2 x i32>*
1817  %v = load <2 x i32>, <2 x i32>* %s
1818  ret <2 x i32> %v
1819}
1820
1821define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
1822; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
1823; CHECK:         .functype load_v2i64_with_folded_gep_offset (i32) -> (v128)
1824; CHECK-NEXT:  # %bb.0:
1825; CHECK-NEXT:    local.get 0
1826; CHECK-NEXT:    v128.load 16
1827; CHECK-NEXT:    # fallthrough-return
1828  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
1829  %v = load <2 x i64>, <2 x i64>* %s
1830  ret <2 x i64> %v
1831}
1832
1833define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
1834; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
1835; CHECK:         .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
1836; CHECK-NEXT:  # %bb.0:
1837; CHECK-NEXT:    local.get 0
1838; CHECK-NEXT:    v64x2.load_splat 8
1839; CHECK-NEXT:    # fallthrough-return
1840  %s = getelementptr inbounds i64, i64* %p, i32 1
1841  %e = load i64, i64* %s
1842  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1843  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1844  ret <2 x i64> %v2
1845}
1846
1847define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1848; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
1849; CHECK:         .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128)
1850; CHECK-NEXT:  # %bb.0:
1851; CHECK-NEXT:    local.get 0
1852; CHECK-NEXT:    i64x2.load32x2_s 8
1853; CHECK-NEXT:    # fallthrough-return
1854  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1855  %v = load <2 x i32>, <2 x i32>* %s
1856  %v2 = sext <2 x i32> %v to <2 x i64>
1857  ret <2 x i64> %v2
1858}
1859
1860define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1861; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
1862; CHECK:         .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128)
1863; CHECK-NEXT:  # %bb.0:
1864; CHECK-NEXT:    local.get 0
1865; CHECK-NEXT:    i64x2.load32x2_u 8
1866; CHECK-NEXT:    # fallthrough-return
1867  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1868  %v = load <2 x i32>, <2 x i32>* %s
1869  %v2 = zext <2 x i32> %v to <2 x i64>
1870  ret <2 x i64> %v2
1871}
1872
1873define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1874; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
1875; CHECK:         .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128)
1876; CHECK-NEXT:  # %bb.0:
1877; CHECK-NEXT:    local.get 0
1878; CHECK-NEXT:    i64x2.load32x2_u 8
1879; CHECK-NEXT:    # fallthrough-return
1880  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1881  %v = load <2 x i32>, <2 x i32>* %s
1882  ret <2 x i32> %v
1883}
1884
1885define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
1886; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
1887; CHECK:         .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
1888; CHECK-NEXT:  # %bb.0:
1889; CHECK-NEXT:    local.get 0
1890; CHECK-NEXT:    i32.const -16
1891; CHECK-NEXT:    i32.add
1892; CHECK-NEXT:    v128.load 0
1893; CHECK-NEXT:    # fallthrough-return
1894  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
1895  %v = load <2 x i64>, <2 x i64>* %s
1896  ret <2 x i64> %v
1897}
1898
1899define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
1900; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
1901; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
1902; CHECK-NEXT:  # %bb.0:
1903; CHECK-NEXT:    local.get 0
1904; CHECK-NEXT:    i32.const -8
1905; CHECK-NEXT:    i32.add
1906; CHECK-NEXT:    v64x2.load_splat 0
1907; CHECK-NEXT:    # fallthrough-return
1908  %s = getelementptr inbounds i64, i64* %p, i32 -1
1909  %e = load i64, i64* %s
1910  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1911  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1912  ret <2 x i64> %v2
1913}
1914
1915define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1916; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
1917; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
1918; CHECK-NEXT:  # %bb.0:
1919; CHECK-NEXT:    local.get 0
1920; CHECK-NEXT:    i32.const -8
1921; CHECK-NEXT:    i32.add
1922; CHECK-NEXT:    i64x2.load32x2_s 0
1923; CHECK-NEXT:    # fallthrough-return
1924  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1925  %v = load <2 x i32>, <2 x i32>* %s
1926  %v2 = sext <2 x i32> %v to <2 x i64>
1927  ret <2 x i64> %v2
1928}
1929
1930define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1931; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
1932; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
1933; CHECK-NEXT:  # %bb.0:
1934; CHECK-NEXT:    local.get 0
1935; CHECK-NEXT:    i32.const -8
1936; CHECK-NEXT:    i32.add
1937; CHECK-NEXT:    i64x2.load32x2_u 0
1938; CHECK-NEXT:    # fallthrough-return
1939  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1940  %v = load <2 x i32>, <2 x i32>* %s
1941  %v2 = zext <2 x i32> %v to <2 x i64>
1942  ret <2 x i64> %v2
1943}
1944
1945define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1946; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
1947; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
1948; CHECK-NEXT:  # %bb.0:
1949; CHECK-NEXT:    local.get 0
1950; CHECK-NEXT:    i32.const -8
1951; CHECK-NEXT:    i32.add
1952; CHECK-NEXT:    i64x2.load32x2_u 0
1953; CHECK-NEXT:    # fallthrough-return
1954  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1955  %v = load <2 x i32>, <2 x i32>* %s
1956  ret <2 x i32> %v
1957}
1958
1959define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
1960; CHECK-LABEL: load_v2i64_with_unfolded_offset:
1961; CHECK:         .functype load_v2i64_with_unfolded_offset (i32) -> (v128)
1962; CHECK-NEXT:  # %bb.0:
1963; CHECK-NEXT:    local.get 0
1964; CHECK-NEXT:    i32.const 16
1965; CHECK-NEXT:    i32.add
1966; CHECK-NEXT:    v128.load 0
1967; CHECK-NEXT:    # fallthrough-return
1968  %q = ptrtoint <2 x i64>* %p to i32
1969  %r = add nsw i32 %q, 16
1970  %s = inttoptr i32 %r to <2 x i64>*
1971  %v = load <2 x i64>, <2 x i64>* %s
1972  ret <2 x i64> %v
1973}
1974
1975define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
1976; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
1977; CHECK:         .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128)
1978; CHECK-NEXT:  # %bb.0:
1979; CHECK-NEXT:    local.get 0
1980; CHECK-NEXT:    i32.const 16
1981; CHECK-NEXT:    i32.add
1982; CHECK-NEXT:    v64x2.load_splat 0
1983; CHECK-NEXT:    # fallthrough-return
1984  %q = ptrtoint i64* %p to i32
1985  %r = add nsw i32 %q, 16
1986  %s = inttoptr i32 %r to i64*
1987  %e = load i64, i64* %s
1988  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1989  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1990  ret <2 x i64> %v2
1991}
1992
1993define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
1994; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
1995; CHECK:         .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128)
1996; CHECK-NEXT:  # %bb.0:
1997; CHECK-NEXT:    local.get 0
1998; CHECK-NEXT:    i32.const 16
1999; CHECK-NEXT:    i32.add
2000; CHECK-NEXT:    i64x2.load32x2_s 0
2001; CHECK-NEXT:    # fallthrough-return
2002  %q = ptrtoint <2 x i32>* %p to i32
2003  %r = add nsw i32 %q, 16
2004  %s = inttoptr i32 %r to <2 x i32>*
2005  %v = load <2 x i32>, <2 x i32>* %s
2006  %v2 = sext <2 x i32> %v to <2 x i64>
2007  ret <2 x i64> %v2
2008}
2009
2010define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2011; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
2012; CHECK:         .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128)
2013; CHECK-NEXT:  # %bb.0:
2014; CHECK-NEXT:    local.get 0
2015; CHECK-NEXT:    i32.const 16
2016; CHECK-NEXT:    i32.add
2017; CHECK-NEXT:    i64x2.load32x2_u 0
2018; CHECK-NEXT:    # fallthrough-return
2019  %q = ptrtoint <2 x i32>* %p to i32
2020  %r = add nsw i32 %q, 16
2021  %s = inttoptr i32 %r to <2 x i32>*
2022  %v = load <2 x i32>, <2 x i32>* %s
2023  %v2 = zext <2 x i32> %v to <2 x i64>
2024  ret <2 x i64> %v2
2025}
2026
2027define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2028; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
2029; CHECK:         .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128)
2030; CHECK-NEXT:  # %bb.0:
2031; CHECK-NEXT:    local.get 0
2032; CHECK-NEXT:    i32.const 16
2033; CHECK-NEXT:    i32.add
2034; CHECK-NEXT:    i64x2.load32x2_u 0
2035; CHECK-NEXT:    # fallthrough-return
2036  %q = ptrtoint <2 x i32>* %p to i32
2037  %r = add nsw i32 %q, 16
2038  %s = inttoptr i32 %r to <2 x i32>*
2039  %v = load <2 x i32>, <2 x i32>* %s
2040  ret <2 x i32> %v
2041}
2042
2043define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
2044; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
2045; CHECK:         .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2046; CHECK-NEXT:  # %bb.0:
2047; CHECK-NEXT:    local.get 0
2048; CHECK-NEXT:    i32.const 16
2049; CHECK-NEXT:    i32.add
2050; CHECK-NEXT:    v128.load 0
2051; CHECK-NEXT:    # fallthrough-return
2052  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2053  %v = load <2 x i64>, <2 x i64>* %s
2054  ret <2 x i64> %v
2055}
2056
2057define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
2058; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
2059; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2060; CHECK-NEXT:  # %bb.0:
2061; CHECK-NEXT:    local.get 0
2062; CHECK-NEXT:    i32.const 8
2063; CHECK-NEXT:    i32.add
2064; CHECK-NEXT:    v64x2.load_splat 0
2065; CHECK-NEXT:    # fallthrough-return
2066  %s = getelementptr i64, i64* %p, i32 1
2067  %e = load i64, i64* %s
2068  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2069  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2070  ret <2 x i64> %v2
2071}
2072
2073define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2074; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
2075; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2076; CHECK-NEXT:  # %bb.0:
2077; CHECK-NEXT:    local.get 0
2078; CHECK-NEXT:    i32.const 8
2079; CHECK-NEXT:    i32.add
2080; CHECK-NEXT:    i64x2.load32x2_s 0
2081; CHECK-NEXT:    # fallthrough-return
2082  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2083  %v = load <2 x i32>, <2 x i32>* %s
2084  %v2 = sext <2 x i32> %v to <2 x i64>
2085  ret <2 x i64> %v2
2086}
2087
2088define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2089; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
2090; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2091; CHECK-NEXT:  # %bb.0:
2092; CHECK-NEXT:    local.get 0
2093; CHECK-NEXT:    i32.const 8
2094; CHECK-NEXT:    i32.add
2095; CHECK-NEXT:    i64x2.load32x2_u 0
2096; CHECK-NEXT:    # fallthrough-return
2097  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2098  %v = load <2 x i32>, <2 x i32>* %s
2099  %v2 = zext <2 x i32> %v to <2 x i64>
2100  ret <2 x i64> %v2
2101}
2102
2103define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2104; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
2105; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2106; CHECK-NEXT:  # %bb.0:
2107; CHECK-NEXT:    local.get 0
2108; CHECK-NEXT:    i32.const 8
2109; CHECK-NEXT:    i32.add
2110; CHECK-NEXT:    i64x2.load32x2_u 0
2111; CHECK-NEXT:    # fallthrough-return
2112  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2113  %v = load <2 x i32>, <2 x i32>* %s
2114  ret <2 x i32> %v
2115}
2116
2117define <2 x i64> @load_v2i64_from_numeric_address() {
2118; CHECK-LABEL: load_v2i64_from_numeric_address:
2119; CHECK:         .functype load_v2i64_from_numeric_address () -> (v128)
2120; CHECK-NEXT:  # %bb.0:
2121; CHECK-NEXT:    i32.const 0
2122; CHECK-NEXT:    v128.load 32
2123; CHECK-NEXT:    # fallthrough-return
2124  %s = inttoptr i32 32 to <2 x i64>*
2125  %v = load <2 x i64>, <2 x i64>* %s
2126  ret <2 x i64> %v
2127}
2128
2129define <2 x i64> @load_splat_v2i64_from_numeric_address() {
2130; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
2131; CHECK:         .functype load_splat_v2i64_from_numeric_address () -> (v128)
2132; CHECK-NEXT:  # %bb.0:
2133; CHECK-NEXT:    i32.const 0
2134; CHECK-NEXT:    v64x2.load_splat 32
2135; CHECK-NEXT:    # fallthrough-return
2136  %s = inttoptr i32 32 to i64*
2137  %e = load i64, i64* %s
2138  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2139  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2140  ret <2 x i64> %v2
2141}
2142
2143define <2 x i64> @load_sext_v2i64_from_numeric_address() {
2144; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
2145; CHECK:         .functype load_sext_v2i64_from_numeric_address () -> (v128)
2146; CHECK-NEXT:  # %bb.0:
2147; CHECK-NEXT:    i32.const 0
2148; CHECK-NEXT:    i64x2.load32x2_s 32
2149; CHECK-NEXT:    # fallthrough-return
2150  %s = inttoptr i32 32 to <2 x i32>*
2151  %v = load <2 x i32>, <2 x i32>* %s
2152  %v2 = sext <2 x i32> %v to <2 x i64>
2153  ret <2 x i64> %v2
2154}
2155
2156define <2 x i64> @load_zext_v2i64_from_numeric_address() {
2157; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
2158; CHECK:         .functype load_zext_v2i64_from_numeric_address () -> (v128)
2159; CHECK-NEXT:  # %bb.0:
2160; CHECK-NEXT:    i32.const 0
2161; CHECK-NEXT:    i64x2.load32x2_u 32
2162; CHECK-NEXT:    # fallthrough-return
2163  %s = inttoptr i32 32 to <2 x i32>*
2164  %v = load <2 x i32>, <2 x i32>* %s
2165  %v2 = zext <2 x i32> %v to <2 x i64>
2166  ret <2 x i64> %v2
2167}
2168
2169define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2170; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2171; CHECK:         .functype load_ext_v2i64_from_numeric_address () -> (v128)
2172; CHECK-NEXT:  # %bb.0:
2173; CHECK-NEXT:    i32.const 0
2174; CHECK-NEXT:    i64x2.load32x2_u 32
2175; CHECK-NEXT:    # fallthrough-return
2176  %s = inttoptr i32 32 to <2 x i32>*
2177  %v = load <2 x i32>, <2 x i32>* %s
2178  ret <2 x i32> %v
2179}
2180
2181@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2182define <2 x i64> @load_v2i64_from_global_address() {
2183; CHECK-LABEL: load_v2i64_from_global_address:
2184; CHECK:         .functype load_v2i64_from_global_address () -> (v128)
2185; CHECK-NEXT:  # %bb.0:
2186; CHECK-NEXT:    i32.const 0
2187; CHECK-NEXT:    v128.load gv_v2i64
2188; CHECK-NEXT:    # fallthrough-return
2189  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
2190  ret <2 x i64> %v
2191}
2192
2193@gv_i64 = global i64 42
2194define <2 x i64> @load_splat_v2i64_from_global_address() {
2195; CHECK-LABEL: load_splat_v2i64_from_global_address:
2196; CHECK:         .functype load_splat_v2i64_from_global_address () -> (v128)
2197; CHECK-NEXT:  # %bb.0:
2198; CHECK-NEXT:    i32.const 0
2199; CHECK-NEXT:    v64x2.load_splat gv_i64
2200; CHECK-NEXT:    # fallthrough-return
2201  %e = load i64, i64* @gv_i64
2202  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2203  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2204  ret <2 x i64> %v2
2205}
2206
2207@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2208define <2 x i64> @load_sext_v2i64_from_global_address() {
2209; CHECK-LABEL: load_sext_v2i64_from_global_address:
2210; CHECK:         .functype load_sext_v2i64_from_global_address () -> (v128)
2211; CHECK-NEXT:  # %bb.0:
2212; CHECK-NEXT:    i32.const 0
2213; CHECK-NEXT:    i64x2.load32x2_s gv_v2i32
2214; CHECK-NEXT:    # fallthrough-return
2215  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2216  %v2 = sext <2 x i32> %v to <2 x i64>
2217  ret <2 x i64> %v2
2218}
2219
2220define <2 x i64> @load_zext_v2i64_from_global_address() {
2221; CHECK-LABEL: load_zext_v2i64_from_global_address:
2222; CHECK:         .functype load_zext_v2i64_from_global_address () -> (v128)
2223; CHECK-NEXT:  # %bb.0:
2224; CHECK-NEXT:    i32.const 0
2225; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2226; CHECK-NEXT:    # fallthrough-return
2227  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2228  %v2 = zext <2 x i32> %v to <2 x i64>
2229  ret <2 x i64> %v2
2230}
2231
2232define <2 x i32> @load_ext_v2i64_from_global_address() {
2233; CHECK-LABEL: load_ext_v2i64_from_global_address:
2234; CHECK:         .functype load_ext_v2i64_from_global_address () -> (v128)
2235; CHECK-NEXT:  # %bb.0:
2236; CHECK-NEXT:    i32.const 0
2237; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2238; CHECK-NEXT:    # fallthrough-return
2239  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2240  ret <2 x i32> %v
2241}
2242
2243define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
2244; CHECK-LABEL: store_v2i64:
2245; CHECK:         .functype store_v2i64 (v128, i32) -> ()
2246; CHECK-NEXT:  # %bb.0:
2247; CHECK-NEXT:    local.get 1
2248; CHECK-NEXT:    local.get 0
2249; CHECK-NEXT:    v128.store 0
2250; CHECK-NEXT:    # fallthrough-return
2251  store <2 x i64> %v , <2 x i64>* %p
2252  ret void
2253}
2254
2255define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
2256; CHECK-LABEL: store_v2i64_with_folded_offset:
2257; CHECK:         .functype store_v2i64_with_folded_offset (v128, i32) -> ()
2258; CHECK-NEXT:  # %bb.0:
2259; CHECK-NEXT:    local.get 1
2260; CHECK-NEXT:    local.get 0
2261; CHECK-NEXT:    v128.store 16
2262; CHECK-NEXT:    # fallthrough-return
2263  %q = ptrtoint <2 x i64>* %p to i32
2264  %r = add nuw i32 %q, 16
2265  %s = inttoptr i32 %r to <2 x i64>*
2266  store <2 x i64> %v , <2 x i64>* %s
2267  ret void
2268}
2269
2270define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2271; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2272; CHECK:         .functype store_v2i64_with_folded_gep_offset (v128, i32) -> ()
2273; CHECK-NEXT:  # %bb.0:
2274; CHECK-NEXT:    local.get 1
2275; CHECK-NEXT:    local.get 0
2276; CHECK-NEXT:    v128.store 16
2277; CHECK-NEXT:    # fallthrough-return
2278  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2279  store <2 x i64> %v , <2 x i64>* %s
2280  ret void
2281}
2282
2283define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
2284; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2285; CHECK:         .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
2286; CHECK-NEXT:  # %bb.0:
2287; CHECK-NEXT:    local.get 1
2288; CHECK-NEXT:    i32.const -16
2289; CHECK-NEXT:    i32.add
2290; CHECK-NEXT:    local.get 0
2291; CHECK-NEXT:    v128.store 0
2292; CHECK-NEXT:    # fallthrough-return
2293  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2294  store <2 x i64> %v , <2 x i64>* %s
2295  ret void
2296}
2297
2298define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
2299; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2300; CHECK:         .functype store_v2i64_with_unfolded_offset (v128, i32) -> ()
2301; CHECK-NEXT:  # %bb.0:
2302; CHECK-NEXT:    local.get 1
2303; CHECK-NEXT:    i32.const -16
2304; CHECK-NEXT:    i32.add
2305; CHECK-NEXT:    local.get 0
2306; CHECK-NEXT:    v128.store 0
2307; CHECK-NEXT:    # fallthrough-return
2308  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2309  store <2 x i64> %v , <2 x i64>* %s
2310  ret void
2311}
2312
2313define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2314; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2315; CHECK:         .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> ()
2316; CHECK-NEXT:  # %bb.0:
2317; CHECK-NEXT:    local.get 1
2318; CHECK-NEXT:    i32.const 16
2319; CHECK-NEXT:    i32.add
2320; CHECK-NEXT:    local.get 0
2321; CHECK-NEXT:    v128.store 0
2322; CHECK-NEXT:    # fallthrough-return
2323  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2324  store <2 x i64> %v , <2 x i64>* %s
2325  ret void
2326}
2327
2328define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2329; CHECK-LABEL: store_v2i64_to_numeric_address:
2330; CHECK:         .functype store_v2i64_to_numeric_address (v128) -> ()
2331; CHECK-NEXT:  # %bb.0:
2332; CHECK-NEXT:    i32.const 0
2333; CHECK-NEXT:    local.get 0
2334; CHECK-NEXT:    v128.store 32
2335; CHECK-NEXT:    # fallthrough-return
2336  %s = inttoptr i32 32 to <2 x i64>*
2337  store <2 x i64> %v , <2 x i64>* %s
2338  ret void
2339}
2340
2341define void @store_v2i64_to_global_address(<2 x i64> %v) {
2342; CHECK-LABEL: store_v2i64_to_global_address:
2343; CHECK:         .functype store_v2i64_to_global_address (v128) -> ()
2344; CHECK-NEXT:  # %bb.0:
2345; CHECK-NEXT:    i32.const 0
2346; CHECK-NEXT:    local.get 0
2347; CHECK-NEXT:    v128.store gv_v2i64
2348; CHECK-NEXT:    # fallthrough-return
2349  store <2 x i64> %v , <2 x i64>* @gv_v2i64
2350  ret void
2351}
2352
2353; ==============================================================================
2354; 4 x float
2355; ==============================================================================
2356define <4 x float> @load_v4f32(<4 x float>* %p) {
2357; CHECK-LABEL: load_v4f32:
2358; CHECK:         .functype load_v4f32 (i32) -> (v128)
2359; CHECK-NEXT:  # %bb.0:
2360; CHECK-NEXT:    local.get 0
2361; CHECK-NEXT:    v128.load 0
2362; CHECK-NEXT:    # fallthrough-return
2363  %v = load <4 x float>, <4 x float>* %p
2364  ret <4 x float> %v
2365}
2366
2367define <4 x float> @load_splat_v4f32(float* %p) {
2368; CHECK-LABEL: load_splat_v4f32:
2369; CHECK:         .functype load_splat_v4f32 (i32) -> (v128)
2370; CHECK-NEXT:  # %bb.0:
2371; CHECK-NEXT:    local.get 0
2372; CHECK-NEXT:    v32x4.load_splat 0
2373; CHECK-NEXT:    # fallthrough-return
2374  %e = load float, float* %p
2375  %v1 = insertelement <4 x float> undef, float %e, i32 0
2376  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2377  ret <4 x float> %v2
2378}
2379
2380define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
2381; CHECK-LABEL: load_v4f32_with_folded_offset:
2382; CHECK:         .functype load_v4f32_with_folded_offset (i32) -> (v128)
2383; CHECK-NEXT:  # %bb.0:
2384; CHECK-NEXT:    local.get 0
2385; CHECK-NEXT:    v128.load 16
2386; CHECK-NEXT:    # fallthrough-return
2387  %q = ptrtoint <4 x float>* %p to i32
2388  %r = add nuw i32 %q, 16
2389  %s = inttoptr i32 %r to <4 x float>*
2390  %v = load <4 x float>, <4 x float>* %s
2391  ret <4 x float> %v
2392}
2393
2394define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
2395; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2396; CHECK:         .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
2397; CHECK-NEXT:  # %bb.0:
2398; CHECK-NEXT:    local.get 0
2399; CHECK-NEXT:    v32x4.load_splat 16
2400; CHECK-NEXT:    # fallthrough-return
2401  %q = ptrtoint float* %p to i32
2402  %r = add nuw i32 %q, 16
2403  %s = inttoptr i32 %r to float*
2404  %e = load float, float* %s
2405  %v1 = insertelement <4 x float> undef, float %e, i32 0
2406  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2407  ret <4 x float> %v2
2408}
2409
2410define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
2411; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2412; CHECK:         .functype load_v4f32_with_folded_gep_offset (i32) -> (v128)
2413; CHECK-NEXT:  # %bb.0:
2414; CHECK-NEXT:    local.get 0
2415; CHECK-NEXT:    v128.load 16
2416; CHECK-NEXT:    # fallthrough-return
2417  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2418  %v = load <4 x float>, <4 x float>* %s
2419  ret <4 x float> %v
2420}
2421
2422define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
2423; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2424; CHECK:         .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
2425; CHECK-NEXT:  # %bb.0:
2426; CHECK-NEXT:    local.get 0
2427; CHECK-NEXT:    v32x4.load_splat 4
2428; CHECK-NEXT:    # fallthrough-return
2429  %s = getelementptr inbounds float, float* %p, i32 1
2430  %e = load float, float* %s
2431  %v1 = insertelement <4 x float> undef, float %e, i32 0
2432  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2433  ret <4 x float> %v2
2434}
2435
2436define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
2437; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2438; CHECK:         .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2439; CHECK-NEXT:  # %bb.0:
2440; CHECK-NEXT:    local.get 0
2441; CHECK-NEXT:    i32.const -16
2442; CHECK-NEXT:    i32.add
2443; CHECK-NEXT:    v128.load 0
2444; CHECK-NEXT:    # fallthrough-return
2445  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2446  %v = load <4 x float>, <4 x float>* %s
2447  ret <4 x float> %v
2448}
2449
2450define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
2451; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2452; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2453; CHECK-NEXT:  # %bb.0:
2454; CHECK-NEXT:    local.get 0
2455; CHECK-NEXT:    i32.const -4
2456; CHECK-NEXT:    i32.add
2457; CHECK-NEXT:    v32x4.load_splat 0
2458; CHECK-NEXT:    # fallthrough-return
2459  %s = getelementptr inbounds float, float* %p, i32 -1
2460  %e = load float, float* %s
2461  %v1 = insertelement <4 x float> undef, float %e, i32 0
2462  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2463  ret <4 x float> %v2
2464}
2465
2466define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
2467; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2468; CHECK:         .functype load_v4f32_with_unfolded_offset (i32) -> (v128)
2469; CHECK-NEXT:  # %bb.0:
2470; CHECK-NEXT:    local.get 0
2471; CHECK-NEXT:    i32.const 16
2472; CHECK-NEXT:    i32.add
2473; CHECK-NEXT:    v128.load 0
2474; CHECK-NEXT:    # fallthrough-return
2475  %q = ptrtoint <4 x float>* %p to i32
2476  %r = add nsw i32 %q, 16
2477  %s = inttoptr i32 %r to <4 x float>*
2478  %v = load <4 x float>, <4 x float>* %s
2479  ret <4 x float> %v
2480}
2481
2482define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
2483; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2484; CHECK:         .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128)
2485; CHECK-NEXT:  # %bb.0:
2486; CHECK-NEXT:    local.get 0
2487; CHECK-NEXT:    i32.const 16
2488; CHECK-NEXT:    i32.add
2489; CHECK-NEXT:    v32x4.load_splat 0
2490; CHECK-NEXT:    # fallthrough-return
2491  %q = ptrtoint float* %p to i32
2492  %r = add nsw i32 %q, 16
2493  %s = inttoptr i32 %r to float*
2494  %e = load float, float* %s
2495  %v1 = insertelement <4 x float> undef, float %e, i32 0
2496  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2497  ret <4 x float> %v2
2498}
2499
2500define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
2501; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2502; CHECK:         .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2503; CHECK-NEXT:  # %bb.0:
2504; CHECK-NEXT:    local.get 0
2505; CHECK-NEXT:    i32.const 16
2506; CHECK-NEXT:    i32.add
2507; CHECK-NEXT:    v128.load 0
2508; CHECK-NEXT:    # fallthrough-return
2509  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2510  %v = load <4 x float>, <4 x float>* %s
2511  ret <4 x float> %v
2512}
2513
2514define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
2515; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
2516; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2517; CHECK-NEXT:  # %bb.0:
2518; CHECK-NEXT:    local.get 0
2519; CHECK-NEXT:    i32.const 4
2520; CHECK-NEXT:    i32.add
2521; CHECK-NEXT:    v32x4.load_splat 0
2522; CHECK-NEXT:    # fallthrough-return
2523  %s = getelementptr float, float* %p, i32 1
2524  %e = load float, float* %s
2525  %v1 = insertelement <4 x float> undef, float %e, i32 0
2526  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2527  ret <4 x float> %v2
2528}
2529
2530define <4 x float> @load_v4f32_from_numeric_address() {
2531; CHECK-LABEL: load_v4f32_from_numeric_address:
2532; CHECK:         .functype load_v4f32_from_numeric_address () -> (v128)
2533; CHECK-NEXT:  # %bb.0:
2534; CHECK-NEXT:    i32.const 0
2535; CHECK-NEXT:    v128.load 32
2536; CHECK-NEXT:    # fallthrough-return
2537  %s = inttoptr i32 32 to <4 x float>*
2538  %v = load <4 x float>, <4 x float>* %s
2539  ret <4 x float> %v
2540}
2541
2542define <4 x float> @load_splat_v4f32_from_numeric_address() {
2543; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
2544; CHECK:         .functype load_splat_v4f32_from_numeric_address () -> (v128)
2545; CHECK-NEXT:  # %bb.0:
2546; CHECK-NEXT:    i32.const 0
2547; CHECK-NEXT:    v32x4.load_splat 32
2548; CHECK-NEXT:    # fallthrough-return
2549  %s = inttoptr i32 32 to float*
2550  %e = load float, float* %s
2551  %v1 = insertelement <4 x float> undef, float %e, i32 0
2552  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2553  ret <4 x float> %v2
2554}
2555
2556@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
2557define <4 x float> @load_v4f32_from_global_address() {
2558; CHECK-LABEL: load_v4f32_from_global_address:
2559; CHECK:         .functype load_v4f32_from_global_address () -> (v128)
2560; CHECK-NEXT:  # %bb.0:
2561; CHECK-NEXT:    i32.const 0
2562; CHECK-NEXT:    v128.load gv_v4f32
2563; CHECK-NEXT:    # fallthrough-return
2564  %v = load <4 x float>, <4 x float>* @gv_v4f32
2565  ret <4 x float> %v
2566}
2567
2568@gv_f32 = global float 42.
2569define <4 x float> @load_splat_v4f32_from_global_address() {
2570; CHECK-LABEL: load_splat_v4f32_from_global_address:
2571; CHECK:         .functype load_splat_v4f32_from_global_address () -> (v128)
2572; CHECK-NEXT:  # %bb.0:
2573; CHECK-NEXT:    i32.const 0
2574; CHECK-NEXT:    v32x4.load_splat gv_f32
2575; CHECK-NEXT:    # fallthrough-return
2576  %e = load float, float* @gv_f32
2577  %v1 = insertelement <4 x float> undef, float %e, i32 0
2578  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2579  ret <4 x float> %v2
2580}
2581
2582define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
2583; CHECK-LABEL: store_v4f32:
2584; CHECK:         .functype store_v4f32 (v128, i32) -> ()
2585; CHECK-NEXT:  # %bb.0:
2586; CHECK-NEXT:    local.get 1
2587; CHECK-NEXT:    local.get 0
2588; CHECK-NEXT:    v128.store 0
2589; CHECK-NEXT:    # fallthrough-return
2590  store <4 x float> %v , <4 x float>* %p
2591  ret void
2592}
2593
2594define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
2595; CHECK-LABEL: store_v4f32_with_folded_offset:
2596; CHECK:         .functype store_v4f32_with_folded_offset (v128, i32) -> ()
2597; CHECK-NEXT:  # %bb.0:
2598; CHECK-NEXT:    local.get 1
2599; CHECK-NEXT:    local.get 0
2600; CHECK-NEXT:    v128.store 16
2601; CHECK-NEXT:    # fallthrough-return
2602  %q = ptrtoint <4 x float>* %p to i32
2603  %r = add nuw i32 %q, 16
2604  %s = inttoptr i32 %r to <4 x float>*
2605  store <4 x float> %v , <4 x float>* %s
2606  ret void
2607}
2608
2609define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2610; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
2611; CHECK:         .functype store_v4f32_with_folded_gep_offset (v128, i32) -> ()
2612; CHECK-NEXT:  # %bb.0:
2613; CHECK-NEXT:    local.get 1
2614; CHECK-NEXT:    local.get 0
2615; CHECK-NEXT:    v128.store 16
2616; CHECK-NEXT:    # fallthrough-return
2617  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2618  store <4 x float> %v , <4 x float>* %s
2619  ret void
2620}
2621
2622define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
2623; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
2624; CHECK:         .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> ()
2625; CHECK-NEXT:  # %bb.0:
2626; CHECK-NEXT:    local.get 1
2627; CHECK-NEXT:    i32.const -16
2628; CHECK-NEXT:    i32.add
2629; CHECK-NEXT:    local.get 0
2630; CHECK-NEXT:    v128.store 0
2631; CHECK-NEXT:    # fallthrough-return
2632  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2633  store <4 x float> %v , <4 x float>* %s
2634  ret void
2635}
2636
2637define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
2638; CHECK-LABEL: store_v4f32_with_unfolded_offset:
2639; CHECK:         .functype store_v4f32_with_unfolded_offset (v128, i32) -> ()
2640; CHECK-NEXT:  # %bb.0:
2641; CHECK-NEXT:    local.get 1
2642; CHECK-NEXT:    i32.const -16
2643; CHECK-NEXT:    i32.add
2644; CHECK-NEXT:    local.get 0
2645; CHECK-NEXT:    v128.store 0
2646; CHECK-NEXT:    # fallthrough-return
2647  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2648  store <4 x float> %v , <4 x float>* %s
2649  ret void
2650}
2651
2652define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2653; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
2654; CHECK:         .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> ()
2655; CHECK-NEXT:  # %bb.0:
2656; CHECK-NEXT:    local.get 1
2657; CHECK-NEXT:    i32.const 16
2658; CHECK-NEXT:    i32.add
2659; CHECK-NEXT:    local.get 0
2660; CHECK-NEXT:    v128.store 0
2661; CHECK-NEXT:    # fallthrough-return
2662  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2663  store <4 x float> %v , <4 x float>* %s
2664  ret void
2665}
2666
2667define void @store_v4f32_to_numeric_address(<4 x float> %v) {
2668; CHECK-LABEL: store_v4f32_to_numeric_address:
2669; CHECK:         .functype store_v4f32_to_numeric_address (v128) -> ()
2670; CHECK-NEXT:  # %bb.0:
2671; CHECK-NEXT:    i32.const 0
2672; CHECK-NEXT:    local.get 0
2673; CHECK-NEXT:    v128.store 32
2674; CHECK-NEXT:    # fallthrough-return
2675  %s = inttoptr i32 32 to <4 x float>*
2676  store <4 x float> %v , <4 x float>* %s
2677  ret void
2678}
2679
2680define void @store_v4f32_to_global_address(<4 x float> %v) {
2681; CHECK-LABEL: store_v4f32_to_global_address:
2682; CHECK:         .functype store_v4f32_to_global_address (v128) -> ()
2683; CHECK-NEXT:  # %bb.0:
2684; CHECK-NEXT:    i32.const 0
2685; CHECK-NEXT:    local.get 0
2686; CHECK-NEXT:    v128.store gv_v4f32
2687; CHECK-NEXT:    # fallthrough-return
2688  store <4 x float> %v , <4 x float>* @gv_v4f32
2689  ret void
2690}
2691
2692; ==============================================================================
2693; 2 x double
2694; ==============================================================================
2695define <2 x double> @load_v2f64(<2 x double>* %p) {
2696; CHECK-LABEL: load_v2f64:
2697; CHECK:         .functype load_v2f64 (i32) -> (v128)
2698; CHECK-NEXT:  # %bb.0:
2699; CHECK-NEXT:    local.get 0
2700; CHECK-NEXT:    v128.load 0
2701; CHECK-NEXT:    # fallthrough-return
2702  %v = load <2 x double>, <2 x double>* %p
2703  ret <2 x double> %v
2704}
2705
2706define <2 x double> @load_splat_v2f64(double* %p) {
2707; CHECK-LABEL: load_splat_v2f64:
2708; CHECK:         .functype load_splat_v2f64 (i32) -> (v128)
2709; CHECK-NEXT:  # %bb.0:
2710; CHECK-NEXT:    local.get 0
2711; CHECK-NEXT:    v64x2.load_splat 0
2712; CHECK-NEXT:    # fallthrough-return
2713  %e = load double, double* %p
2714  %v1 = insertelement <2 x double> undef, double %e, i32 0
2715  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2716  ret <2 x double> %v2
2717}
2718
2719define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
2720; CHECK-LABEL: load_v2f64_with_folded_offset:
2721; CHECK:         .functype load_v2f64_with_folded_offset (i32) -> (v128)
2722; CHECK-NEXT:  # %bb.0:
2723; CHECK-NEXT:    local.get 0
2724; CHECK-NEXT:    v128.load 16
2725; CHECK-NEXT:    # fallthrough-return
2726  %q = ptrtoint <2 x double>* %p to i32
2727  %r = add nuw i32 %q, 16
2728  %s = inttoptr i32 %r to <2 x double>*
2729  %v = load <2 x double>, <2 x double>* %s
2730  ret <2 x double> %v
2731}
2732
2733define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
2734; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
2735; CHECK:         .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
2736; CHECK-NEXT:  # %bb.0:
2737; CHECK-NEXT:    local.get 0
2738; CHECK-NEXT:    v64x2.load_splat 16
2739; CHECK-NEXT:    # fallthrough-return
2740  %q = ptrtoint double* %p to i32
2741  %r = add nuw i32 %q, 16
2742  %s = inttoptr i32 %r to double*
2743  %e = load double, double* %s
2744  %v1 = insertelement <2 x double> undef, double %e, i32 0
2745  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2746  ret <2 x double> %v2
2747}
2748
2749define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
2750; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
2751; CHECK:         .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
2752; CHECK-NEXT:  # %bb.0:
2753; CHECK-NEXT:    local.get 0
2754; CHECK-NEXT:    v128.load 16
2755; CHECK-NEXT:    # fallthrough-return
2756  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
2757  %v = load <2 x double>, <2 x double>* %s
2758  ret <2 x double> %v
2759}
2760
2761define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
2762; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
2763; CHECK:         .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
2764; CHECK-NEXT:  # %bb.0:
2765; CHECK-NEXT:    local.get 0
2766; CHECK-NEXT:    v64x2.load_splat 8
2767; CHECK-NEXT:    # fallthrough-return
2768  %s = getelementptr inbounds double, double* %p, i32 1
2769  %e = load double, double* %s
2770  %v1 = insertelement <2 x double> undef, double %e, i32 0
2771  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2772  ret <2 x double> %v2
2773}
2774
2775define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
2776; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
2777; CHECK:         .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
2778; CHECK-NEXT:  # %bb.0:
2779; CHECK-NEXT:    local.get 0
2780; CHECK-NEXT:    i32.const -16
2781; CHECK-NEXT:    i32.add
2782; CHECK-NEXT:    v128.load 0
2783; CHECK-NEXT:    # fallthrough-return
2784  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2785  %v = load <2 x double>, <2 x double>* %s
2786  ret <2 x double> %v
2787}
2788
2789define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
2790; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
2791; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
2792; CHECK-NEXT:  # %bb.0:
2793; CHECK-NEXT:    local.get 0
2794; CHECK-NEXT:    i32.const -8
2795; CHECK-NEXT:    i32.add
2796; CHECK-NEXT:    v64x2.load_splat 0
2797; CHECK-NEXT:    # fallthrough-return
2798  %s = getelementptr inbounds double, double* %p, i32 -1
2799  %e = load double, double* %s
2800  %v1 = insertelement <2 x double> undef, double %e, i32 0
2801  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2802  ret <2 x double> %v2
2803}
2804
2805define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
2806; CHECK-LABEL: load_v2f64_with_unfolded_offset:
2807; CHECK:         .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
2808; CHECK-NEXT:  # %bb.0:
2809; CHECK-NEXT:    local.get 0
2810; CHECK-NEXT:    i32.const 16
2811; CHECK-NEXT:    i32.add
2812; CHECK-NEXT:    v128.load 0
2813; CHECK-NEXT:    # fallthrough-return
2814  %q = ptrtoint <2 x double>* %p to i32
2815  %r = add nsw i32 %q, 16
2816  %s = inttoptr i32 %r to <2 x double>*
2817  %v = load <2 x double>, <2 x double>* %s
2818  ret <2 x double> %v
2819}
2820
2821define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
2822; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
2823; CHECK:         .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128)
2824; CHECK-NEXT:  # %bb.0:
2825; CHECK-NEXT:    local.get 0
2826; CHECK-NEXT:    i32.const 16
2827; CHECK-NEXT:    i32.add
2828; CHECK-NEXT:    v64x2.load_splat 0
2829; CHECK-NEXT:    # fallthrough-return
2830  %q = ptrtoint double* %p to i32
2831  %r = add nsw i32 %q, 16
2832  %s = inttoptr i32 %r to double*
2833  %e = load double, double* %s
2834  %v1 = insertelement <2 x double> undef, double %e, i32 0
2835  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2836  ret <2 x double> %v2
2837}
2838
2839define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
2840; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
2841; CHECK:         .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
2842; CHECK-NEXT:  # %bb.0:
2843; CHECK-NEXT:    local.get 0
2844; CHECK-NEXT:    i32.const 16
2845; CHECK-NEXT:    i32.add
2846; CHECK-NEXT:    v128.load 0
2847; CHECK-NEXT:    # fallthrough-return
2848  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
2849  %v = load <2 x double>, <2 x double>* %s
2850  ret <2 x double> %v
2851}
2852
2853define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
2854; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
2855; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128)
2856; CHECK-NEXT:  # %bb.0:
2857; CHECK-NEXT:    local.get 0
2858; CHECK-NEXT:    i32.const 8
2859; CHECK-NEXT:    i32.add
2860; CHECK-NEXT:    v64x2.load_splat 0
2861; CHECK-NEXT:    # fallthrough-return
2862  %s = getelementptr double, double* %p, i32 1
2863  %e = load double, double* %s
2864  %v1 = insertelement <2 x double> undef, double %e, i32 0
2865  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2866  ret <2 x double> %v2
2867}
2868
2869define <2 x double> @load_v2f64_from_numeric_address() {
2870; CHECK-LABEL: load_v2f64_from_numeric_address:
2871; CHECK:         .functype load_v2f64_from_numeric_address () -> (v128)
2872; CHECK-NEXT:  # %bb.0:
2873; CHECK-NEXT:    i32.const 0
2874; CHECK-NEXT:    v128.load 32
2875; CHECK-NEXT:    # fallthrough-return
2876  %s = inttoptr i32 32 to <2 x double>*
2877  %v = load <2 x double>, <2 x double>* %s
2878  ret <2 x double> %v
2879}
2880
2881define <2 x double> @load_splat_v2f64_from_numeric_address() {
2882; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
2883; CHECK:         .functype load_splat_v2f64_from_numeric_address () -> (v128)
2884; CHECK-NEXT:  # %bb.0:
2885; CHECK-NEXT:    i32.const 0
2886; CHECK-NEXT:    v64x2.load_splat 32
2887; CHECK-NEXT:    # fallthrough-return
2888  %s = inttoptr i32 32 to double*
2889  %e = load double, double* %s
2890  %v1 = insertelement <2 x double> undef, double %e, i32 0
2891  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2892  ret <2 x double> %v2
2893}
2894
2895@gv_v2f64 = global <2 x double> <double 42., double 42.>
2896define <2 x double> @load_v2f64_from_global_address() {
2897; CHECK-LABEL: load_v2f64_from_global_address:
2898; CHECK:         .functype load_v2f64_from_global_address () -> (v128)
2899; CHECK-NEXT:  # %bb.0:
2900; CHECK-NEXT:    i32.const 0
2901; CHECK-NEXT:    v128.load gv_v2f64
2902; CHECK-NEXT:    # fallthrough-return
2903  %v = load <2 x double>, <2 x double>* @gv_v2f64
2904  ret <2 x double> %v
2905}
2906
2907@gv_f64 = global double 42.
2908define <2 x double> @load_splat_v2f64_from_global_address() {
2909; CHECK-LABEL: load_splat_v2f64_from_global_address:
2910; CHECK:         .functype load_splat_v2f64_from_global_address () -> (v128)
2911; CHECK-NEXT:  # %bb.0:
2912; CHECK-NEXT:    i32.const 0
2913; CHECK-NEXT:    v64x2.load_splat gv_f64
2914; CHECK-NEXT:    # fallthrough-return
2915  %e = load double, double* @gv_f64
2916  %v1 = insertelement <2 x double> undef, double %e, i32 0
2917  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2918  ret <2 x double> %v2
2919}
2920
2921define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
2922; CHECK-LABEL: store_v2f64:
2923; CHECK:         .functype store_v2f64 (v128, i32) -> ()
2924; CHECK-NEXT:  # %bb.0:
2925; CHECK-NEXT:    local.get 1
2926; CHECK-NEXT:    local.get 0
2927; CHECK-NEXT:    v128.store 0
2928; CHECK-NEXT:    # fallthrough-return
2929  store <2 x double> %v , <2 x double>* %p
2930  ret void
2931}
2932
2933define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
2934; CHECK-LABEL: store_v2f64_with_folded_offset:
2935; CHECK:         .functype store_v2f64_with_folded_offset (v128, i32) -> ()
2936; CHECK-NEXT:  # %bb.0:
2937; CHECK-NEXT:    local.get 1
2938; CHECK-NEXT:    local.get 0
2939; CHECK-NEXT:    v128.store 16
2940; CHECK-NEXT:    # fallthrough-return
2941  %q = ptrtoint <2 x double>* %p to i32
2942  %r = add nuw i32 %q, 16
2943  %s = inttoptr i32 %r to <2 x double>*
2944  store <2 x double> %v , <2 x double>* %s
2945  ret void
2946}
2947
2948define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
2949; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
2950; CHECK:         .functype store_v2f64_with_folded_gep_offset (v128, i32) -> ()
2951; CHECK-NEXT:  # %bb.0:
2952; CHECK-NEXT:    local.get 1
2953; CHECK-NEXT:    local.get 0
2954; CHECK-NEXT:    v128.store 16
2955; CHECK-NEXT:    # fallthrough-return
2956  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
2957  store <2 x double> %v , <2 x double>* %s
2958  ret void
2959}
2960
2961define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
2962; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
2963; CHECK:         .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> ()
2964; CHECK-NEXT:  # %bb.0:
2965; CHECK-NEXT:    local.get 1
2966; CHECK-NEXT:    i32.const -16
2967; CHECK-NEXT:    i32.add
2968; CHECK-NEXT:    local.get 0
2969; CHECK-NEXT:    v128.store 0
2970; CHECK-NEXT:    # fallthrough-return
2971  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2972  store <2 x double> %v , <2 x double>* %s
2973  ret void
2974}
2975
2976define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
2977; CHECK-LABEL: store_v2f64_with_unfolded_offset:
2978; CHECK:         .functype store_v2f64_with_unfolded_offset (v128, i32) -> ()
2979; CHECK-NEXT:  # %bb.0:
2980; CHECK-NEXT:    local.get 1
2981; CHECK-NEXT:    i32.const -16
2982; CHECK-NEXT:    i32.add
2983; CHECK-NEXT:    local.get 0
2984; CHECK-NEXT:    v128.store 0
2985; CHECK-NEXT:    # fallthrough-return
2986  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2987  store <2 x double> %v , <2 x double>* %s
2988  ret void
2989}
2990
2991define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
2992; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
2993; CHECK:         .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> ()
2994; CHECK-NEXT:  # %bb.0:
2995; CHECK-NEXT:    local.get 1
2996; CHECK-NEXT:    i32.const 16
2997; CHECK-NEXT:    i32.add
2998; CHECK-NEXT:    local.get 0
2999; CHECK-NEXT:    v128.store 0
3000; CHECK-NEXT:    # fallthrough-return
3001  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
3002  store <2 x double> %v , <2 x double>* %s
3003  ret void
3004}
3005
3006define void @store_v2f64_to_numeric_address(<2 x double> %v) {
3007; CHECK-LABEL: store_v2f64_to_numeric_address:
3008; CHECK:         .functype store_v2f64_to_numeric_address (v128) -> ()
3009; CHECK-NEXT:  # %bb.0:
3010; CHECK-NEXT:    i32.const 0
3011; CHECK-NEXT:    local.get 0
3012; CHECK-NEXT:    v128.store 32
3013; CHECK-NEXT:    # fallthrough-return
3014  %s = inttoptr i32 32 to <2 x double>*
3015  store <2 x double> %v , <2 x double>* %s
3016  ret void
3017}
3018
3019define void @store_v2f64_to_global_address(<2 x double> %v) {
3020; CHECK-LABEL: store_v2f64_to_global_address:
3021; CHECK:         .functype store_v2f64_to_global_address (v128) -> ()
3022; CHECK-NEXT:  # %bb.0:
3023; CHECK-NEXT:    i32.const 0
3024; CHECK-NEXT:    local.get 0
3025; CHECK-NEXT:    v128.store gv_v2f64
3026; CHECK-NEXT:    # fallthrough-return
3027  store <2 x double> %v , <2 x double>* @gv_v2f64
3028  ret void
3029}
3030