1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test SIMD loads and stores
5
6target triple = "wasm32-unknown-unknown"
7
8; ==============================================================================
9; 16 x i8
10; ==============================================================================
11define <16 x i8> @load_v16i8(<16 x i8>* %p) {
12; CHECK-LABEL: load_v16i8:
13; CHECK:         .functype load_v16i8 (i32) -> (v128)
14; CHECK-NEXT:  # %bb.0:
15; CHECK-NEXT:    local.get 0
16; CHECK-NEXT:    v128.load 0
17; CHECK-NEXT:    # fallthrough-return
18  %v = load <16 x i8>, <16 x i8>* %p
19  ret <16 x i8> %v
20}
21
22define <16 x i8> @load_splat_v16i8(i8* %p) {
23; CHECK-LABEL: load_splat_v16i8:
24; CHECK:         .functype load_splat_v16i8 (i32) -> (v128)
25; CHECK-NEXT:  # %bb.0:
26; CHECK-NEXT:    local.get 0
27; CHECK-NEXT:    v128.load8_splat 0
28; CHECK-NEXT:    # fallthrough-return
29  %e = load i8, i8* %p
30  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
31  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
32  ret <16 x i8> %v2
33}
34
35define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
36; CHECK-LABEL: load_v16i8_with_folded_offset:
37; CHECK:         .functype load_v16i8_with_folded_offset (i32) -> (v128)
38; CHECK-NEXT:  # %bb.0:
39; CHECK-NEXT:    local.get 0
40; CHECK-NEXT:    v128.load 16
41; CHECK-NEXT:    # fallthrough-return
42  %q = ptrtoint <16 x i8>* %p to i32
43  %r = add nuw i32 %q, 16
44  %s = inttoptr i32 %r to <16 x i8>*
45  %v = load <16 x i8>, <16 x i8>* %s
46  ret <16 x i8> %v
47}
48
49define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
50; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
51; CHECK:         .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
52; CHECK-NEXT:  # %bb.0:
53; CHECK-NEXT:    local.get 0
54; CHECK-NEXT:    v128.load8_splat 16
55; CHECK-NEXT:    # fallthrough-return
56  %q = ptrtoint i8* %p to i32
57  %r = add nuw i32 %q, 16
58  %s = inttoptr i32 %r to i8*
59  %e = load i8, i8* %s
60  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
61  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
62  ret <16 x i8> %v2
63}
64
65define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
66; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
67; CHECK:         .functype load_v16i8_with_folded_gep_offset (i32) -> (v128)
68; CHECK-NEXT:  # %bb.0:
69; CHECK-NEXT:    local.get 0
70; CHECK-NEXT:    v128.load 16
71; CHECK-NEXT:    # fallthrough-return
72  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
73  %v = load <16 x i8>, <16 x i8>* %s
74  ret <16 x i8> %v
75}
76
77define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
78; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
79; CHECK:         .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
80; CHECK-NEXT:  # %bb.0:
81; CHECK-NEXT:    local.get 0
82; CHECK-NEXT:    v128.load8_splat 1
83; CHECK-NEXT:    # fallthrough-return
84  %s = getelementptr inbounds i8, i8* %p, i32 1
85  %e = load i8, i8* %s
86  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
87  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
88  ret <16 x i8> %v2
89}
90
91define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
92; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
93; CHECK:         .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
94; CHECK-NEXT:  # %bb.0:
95; CHECK-NEXT:    local.get 0
96; CHECK-NEXT:    i32.const -16
97; CHECK-NEXT:    i32.add
98; CHECK-NEXT:    v128.load 0
99; CHECK-NEXT:    # fallthrough-return
100  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
101  %v = load <16 x i8>, <16 x i8>* %s
102  ret <16 x i8> %v
103}
104
105define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
106; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
107; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
108; CHECK-NEXT:  # %bb.0:
109; CHECK-NEXT:    local.get 0
110; CHECK-NEXT:    i32.const -1
111; CHECK-NEXT:    i32.add
112; CHECK-NEXT:    v128.load8_splat 0
113; CHECK-NEXT:    # fallthrough-return
114  %s = getelementptr inbounds i8, i8* %p, i32 -1
115  %e = load i8, i8* %s
116  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
117  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
118  ret <16 x i8> %v2
119}
120
121define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
122; CHECK-LABEL: load_v16i8_with_unfolded_offset:
123; CHECK:         .functype load_v16i8_with_unfolded_offset (i32) -> (v128)
124; CHECK-NEXT:  # %bb.0:
125; CHECK-NEXT:    local.get 0
126; CHECK-NEXT:    i32.const 16
127; CHECK-NEXT:    i32.add
128; CHECK-NEXT:    v128.load 0
129; CHECK-NEXT:    # fallthrough-return
130  %q = ptrtoint <16 x i8>* %p to i32
131  %r = add nsw i32 %q, 16
132  %s = inttoptr i32 %r to <16 x i8>*
133  %v = load <16 x i8>, <16 x i8>* %s
134  ret <16 x i8> %v
135}
136
137define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
138; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
139; CHECK:         .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128)
140; CHECK-NEXT:  # %bb.0:
141; CHECK-NEXT:    local.get 0
142; CHECK-NEXT:    i32.const 16
143; CHECK-NEXT:    i32.add
144; CHECK-NEXT:    v128.load8_splat 0
145; CHECK-NEXT:    # fallthrough-return
146  %q = ptrtoint i8* %p to i32
147  %r = add nsw i32 %q, 16
148  %s = inttoptr i32 %r to i8*
149  %e = load i8, i8* %s
150  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
151  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
152  ret <16 x i8> %v2
153}
154
155define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
156; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
157; CHECK:         .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128)
158; CHECK-NEXT:  # %bb.0:
159; CHECK-NEXT:    local.get 0
160; CHECK-NEXT:    i32.const 16
161; CHECK-NEXT:    i32.add
162; CHECK-NEXT:    v128.load 0
163; CHECK-NEXT:    # fallthrough-return
164  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
165  %v = load <16 x i8>, <16 x i8>* %s
166  ret <16 x i8> %v
167}
168
169define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
170; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
171; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128)
172; CHECK-NEXT:  # %bb.0:
173; CHECK-NEXT:    local.get 0
174; CHECK-NEXT:    i32.const 1
175; CHECK-NEXT:    i32.add
176; CHECK-NEXT:    v128.load8_splat 0
177; CHECK-NEXT:    # fallthrough-return
178  %s = getelementptr i8, i8* %p, i32 1
179  %e = load i8, i8* %s
180  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
181  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
182  ret <16 x i8> %v2
183}
184
185define <16 x i8> @load_v16i8_from_numeric_address() {
186; CHECK-LABEL: load_v16i8_from_numeric_address:
187; CHECK:         .functype load_v16i8_from_numeric_address () -> (v128)
188; CHECK-NEXT:  # %bb.0:
189; CHECK-NEXT:    i32.const 0
190; CHECK-NEXT:    v128.load 32
191; CHECK-NEXT:    # fallthrough-return
192  %s = inttoptr i32 32 to <16 x i8>*
193  %v = load <16 x i8>, <16 x i8>* %s
194  ret <16 x i8> %v
195}
196
197define <16 x i8> @load_splat_v16i8_from_numeric_address() {
198; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
199; CHECK:         .functype load_splat_v16i8_from_numeric_address () -> (v128)
200; CHECK-NEXT:  # %bb.0:
201; CHECK-NEXT:    i32.const 0
202; CHECK-NEXT:    v128.load8_splat 32
203; CHECK-NEXT:    # fallthrough-return
204  %s = inttoptr i32 32 to i8*
205  %e = load i8, i8* %s
206  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
207  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
208  ret <16 x i8> %v2
209}
210
211@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
212define <16 x i8> @load_v16i8_from_global_address() {
213; CHECK-LABEL: load_v16i8_from_global_address:
214; CHECK:         .functype load_v16i8_from_global_address () -> (v128)
215; CHECK-NEXT:  # %bb.0:
216; CHECK-NEXT:    i32.const 0
217; CHECK-NEXT:    v128.load gv_v16i8
218; CHECK-NEXT:    # fallthrough-return
219  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
220  ret <16 x i8> %v
221}
222
223@gv_i8 = global i8 42
224define <16 x i8> @load_splat_v16i8_from_global_address() {
225; CHECK-LABEL: load_splat_v16i8_from_global_address:
226; CHECK:         .functype load_splat_v16i8_from_global_address () -> (v128)
227; CHECK-NEXT:  # %bb.0:
228; CHECK-NEXT:    i32.const 0
229; CHECK-NEXT:    v128.load8_splat gv_i8
230; CHECK-NEXT:    # fallthrough-return
231  %e = load i8, i8* @gv_i8
232  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
233  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
234  ret <16 x i8> %v2
235}
236
237define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
238; CHECK-LABEL: store_v16i8:
239; CHECK:         .functype store_v16i8 (v128, i32) -> ()
240; CHECK-NEXT:  # %bb.0:
241; CHECK-NEXT:    local.get 1
242; CHECK-NEXT:    local.get 0
243; CHECK-NEXT:    v128.store 0
244; CHECK-NEXT:    # fallthrough-return
245  store <16 x i8> %v , <16 x i8>* %p
246  ret void
247}
248
249define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
250; CHECK-LABEL: store_v16i8_with_folded_offset:
251; CHECK:         .functype store_v16i8_with_folded_offset (v128, i32) -> ()
252; CHECK-NEXT:  # %bb.0:
253; CHECK-NEXT:    local.get 1
254; CHECK-NEXT:    local.get 0
255; CHECK-NEXT:    v128.store 16
256; CHECK-NEXT:    # fallthrough-return
257  %q = ptrtoint <16 x i8>* %p to i32
258  %r = add nuw i32 %q, 16
259  %s = inttoptr i32 %r to <16 x i8>*
260  store <16 x i8> %v , <16 x i8>* %s
261  ret void
262}
263
264define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
265; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
266; CHECK:         .functype store_v16i8_with_folded_gep_offset (v128, i32) -> ()
267; CHECK-NEXT:  # %bb.0:
268; CHECK-NEXT:    local.get 1
269; CHECK-NEXT:    local.get 0
270; CHECK-NEXT:    v128.store 16
271; CHECK-NEXT:    # fallthrough-return
272  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
273  store <16 x i8> %v , <16 x i8>* %s
274  ret void
275}
276
277define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
278; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
279; CHECK:         .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
280; CHECK-NEXT:  # %bb.0:
281; CHECK-NEXT:    local.get 1
282; CHECK-NEXT:    i32.const -16
283; CHECK-NEXT:    i32.add
284; CHECK-NEXT:    local.get 0
285; CHECK-NEXT:    v128.store 0
286; CHECK-NEXT:    # fallthrough-return
287  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
288  store <16 x i8> %v , <16 x i8>* %s
289  ret void
290}
291
292define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
293; CHECK-LABEL: store_v16i8_with_unfolded_offset:
294; CHECK:         .functype store_v16i8_with_unfolded_offset (v128, i32) -> ()
295; CHECK-NEXT:  # %bb.0:
296; CHECK-NEXT:    local.get 1
297; CHECK-NEXT:    i32.const 16
298; CHECK-NEXT:    i32.add
299; CHECK-NEXT:    local.get 0
300; CHECK-NEXT:    v128.store 0
301; CHECK-NEXT:    # fallthrough-return
302  %q = ptrtoint <16 x i8>* %p to i32
303  %r = add nsw i32 %q, 16
304  %s = inttoptr i32 %r to <16 x i8>*
305  store <16 x i8> %v , <16 x i8>* %s
306  ret void
307}
308
309define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
310; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
311; CHECK:         .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> ()
312; CHECK-NEXT:  # %bb.0:
313; CHECK-NEXT:    local.get 1
314; CHECK-NEXT:    i32.const 16
315; CHECK-NEXT:    i32.add
316; CHECK-NEXT:    local.get 0
317; CHECK-NEXT:    v128.store 0
318; CHECK-NEXT:    # fallthrough-return
319  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
320  store <16 x i8> %v , <16 x i8>* %s
321  ret void
322}
323
324define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
325; CHECK-LABEL: store_v16i8_to_numeric_address:
326; CHECK:         .functype store_v16i8_to_numeric_address (v128) -> ()
327; CHECK-NEXT:  # %bb.0:
328; CHECK-NEXT:    i32.const 0
329; CHECK-NEXT:    local.get 0
330; CHECK-NEXT:    v128.store 32
331; CHECK-NEXT:    # fallthrough-return
332  %s = inttoptr i32 32 to <16 x i8>*
333  store <16 x i8> %v , <16 x i8>* %s
334  ret void
335}
336
337define void @store_v16i8_to_global_address(<16 x i8> %v) {
338; CHECK-LABEL: store_v16i8_to_global_address:
339; CHECK:         .functype store_v16i8_to_global_address (v128) -> ()
340; CHECK-NEXT:  # %bb.0:
341; CHECK-NEXT:    i32.const 0
342; CHECK-NEXT:    local.get 0
343; CHECK-NEXT:    v128.store gv_v16i8
344; CHECK-NEXT:    # fallthrough-return
345  store <16 x i8> %v , <16 x i8>* @gv_v16i8
346  ret void
347}
348
349; ==============================================================================
350; 8 x i16
351; ==============================================================================
352define <8 x i16> @load_v8i16(<8 x i16>* %p) {
353; CHECK-LABEL: load_v8i16:
354; CHECK:         .functype load_v8i16 (i32) -> (v128)
355; CHECK-NEXT:  # %bb.0:
356; CHECK-NEXT:    local.get 0
357; CHECK-NEXT:    v128.load 0
358; CHECK-NEXT:    # fallthrough-return
359  %v = load <8 x i16>, <8 x i16>* %p
360  ret <8 x i16> %v
361}
362
363define <8 x i16> @load_splat_v8i16(i16* %p) {
364; CHECK-LABEL: load_splat_v8i16:
365; CHECK:         .functype load_splat_v8i16 (i32) -> (v128)
366; CHECK-NEXT:  # %bb.0:
367; CHECK-NEXT:    local.get 0
368; CHECK-NEXT:    v128.load16_splat 0
369; CHECK-NEXT:    # fallthrough-return
370  %e = load i16, i16* %p
371  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
372  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
373  ret <8 x i16> %v2
374}
375
376define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
377; CHECK-LABEL: load_sext_v8i16:
378; CHECK:         .functype load_sext_v8i16 (i32) -> (v128)
379; CHECK-NEXT:  # %bb.0:
380; CHECK-NEXT:    local.get 0
381; CHECK-NEXT:    i16x8.load8x8_s 0
382; CHECK-NEXT:    # fallthrough-return
383  %v = load <8 x i8>, <8 x i8>* %p
384  %v2 = sext <8 x i8> %v to <8 x i16>
385  ret <8 x i16> %v2
386}
387
388define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
389; CHECK-LABEL: load_zext_v8i16:
390; CHECK:         .functype load_zext_v8i16 (i32) -> (v128)
391; CHECK-NEXT:  # %bb.0:
392; CHECK-NEXT:    local.get 0
393; CHECK-NEXT:    i16x8.load8x8_u 0
394; CHECK-NEXT:    # fallthrough-return
395  %v = load <8 x i8>, <8 x i8>* %p
396  %v2 = zext <8 x i8> %v to <8 x i16>
397  ret <8 x i16> %v2
398}
399
400define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
401; CHECK-LABEL: load_ext_v8i16:
402; CHECK:         .functype load_ext_v8i16 (i32) -> (v128)
403; CHECK-NEXT:  # %bb.0:
404; CHECK-NEXT:    local.get 0
405; CHECK-NEXT:    v128.load64_zero 0
406; CHECK-NEXT:    # fallthrough-return
407  %v = load <8 x i8>, <8 x i8>* %p
408  ret <8 x i8> %v
409}
410
411define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
412; CHECK-LABEL: load_v8i16_with_folded_offset:
413; CHECK:         .functype load_v8i16_with_folded_offset (i32) -> (v128)
414; CHECK-NEXT:  # %bb.0:
415; CHECK-NEXT:    local.get 0
416; CHECK-NEXT:    v128.load 16
417; CHECK-NEXT:    # fallthrough-return
418  %q = ptrtoint <8 x i16>* %p to i32
419  %r = add nuw i32 %q, 16
420  %s = inttoptr i32 %r to <8 x i16>*
421  %v = load <8 x i16>, <8 x i16>* %s
422  ret <8 x i16> %v
423}
424
425define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
426; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
427; CHECK:         .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
428; CHECK-NEXT:  # %bb.0:
429; CHECK-NEXT:    local.get 0
430; CHECK-NEXT:    v128.load16_splat 16
431; CHECK-NEXT:    # fallthrough-return
432  %q = ptrtoint i16* %p to i32
433  %r = add nuw i32 %q, 16
434  %s = inttoptr i32 %r to i16*
435  %e = load i16, i16* %s
436  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
437  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
438  ret <8 x i16> %v2
439}
440
441define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
442; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
443; CHECK:         .functype load_sext_v8i16_with_folded_offset (i32) -> (v128)
444; CHECK-NEXT:  # %bb.0:
445; CHECK-NEXT:    local.get 0
446; CHECK-NEXT:    i16x8.load8x8_s 16
447; CHECK-NEXT:    # fallthrough-return
448  %q = ptrtoint <8 x i8>* %p to i32
449  %r = add nuw i32 %q, 16
450  %s = inttoptr i32 %r to <8 x i8>*
451  %v = load <8 x i8>, <8 x i8>* %s
452  %v2 = sext <8 x i8> %v to <8 x i16>
453  ret <8 x i16> %v2
454}
455
456define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
457; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
458; CHECK:         .functype load_zext_v8i16_with_folded_offset (i32) -> (v128)
459; CHECK-NEXT:  # %bb.0:
460; CHECK-NEXT:    local.get 0
461; CHECK-NEXT:    i16x8.load8x8_u 16
462; CHECK-NEXT:    # fallthrough-return
463  %q = ptrtoint <8 x i8>* %p to i32
464  %r = add nuw i32 %q, 16
465  %s = inttoptr i32 %r to <8 x i8>*
466  %v = load <8 x i8>, <8 x i8>* %s
467  %v2 = zext <8 x i8> %v to <8 x i16>
468  ret <8 x i16> %v2
469}
470
471define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
472; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
473; CHECK:         .functype load_ext_v8i16_with_folded_offset (i32) -> (v128)
474; CHECK-NEXT:  # %bb.0:
475; CHECK-NEXT:    local.get 0
476; CHECK-NEXT:    v128.load64_zero 16
477; CHECK-NEXT:    # fallthrough-return
478  %q = ptrtoint <8 x i8>* %p to i32
479  %r = add nuw i32 %q, 16
480  %s = inttoptr i32 %r to <8 x i8>*
481  %v = load <8 x i8>, <8 x i8>* %s
482  ret <8 x i8> %v
483}
484
485define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
486; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
487; CHECK:         .functype load_v8i16_with_folded_gep_offset (i32) -> (v128)
488; CHECK-NEXT:  # %bb.0:
489; CHECK-NEXT:    local.get 0
490; CHECK-NEXT:    v128.load 16
491; CHECK-NEXT:    # fallthrough-return
492  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
493  %v = load <8 x i16>, <8 x i16>* %s
494  ret <8 x i16> %v
495}
496
497define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
498; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
499; CHECK:         .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
500; CHECK-NEXT:  # %bb.0:
501; CHECK-NEXT:    local.get 0
502; CHECK-NEXT:    v128.load16_splat 2
503; CHECK-NEXT:    # fallthrough-return
504  %s = getelementptr inbounds i16, i16* %p, i32 1
505  %e = load i16, i16* %s
506  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
507  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
508  ret <8 x i16> %v2
509}
510
511define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
512; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
513; CHECK:         .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128)
514; CHECK-NEXT:  # %bb.0:
515; CHECK-NEXT:    local.get 0
516; CHECK-NEXT:    i16x8.load8x8_s 8
517; CHECK-NEXT:    # fallthrough-return
518  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
519  %v = load <8 x i8>, <8 x i8>* %s
520  %v2 = sext <8 x i8> %v to <8 x i16>
521  ret <8 x i16> %v2
522}
523
524define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
525; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
526; CHECK:         .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128)
527; CHECK-NEXT:  # %bb.0:
528; CHECK-NEXT:    local.get 0
529; CHECK-NEXT:    i16x8.load8x8_u 8
530; CHECK-NEXT:    # fallthrough-return
531  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
532  %v = load <8 x i8>, <8 x i8>* %s
533  %v2 = zext <8 x i8> %v to <8 x i16>
534  ret <8 x i16> %v2
535}
536
537define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
538; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
539; CHECK:         .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128)
540; CHECK-NEXT:  # %bb.0:
541; CHECK-NEXT:    local.get 0
542; CHECK-NEXT:    v128.load64_zero 8
543; CHECK-NEXT:    # fallthrough-return
544  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
545  %v = load <8 x i8>, <8 x i8>* %s
546  ret <8 x i8> %v
547}
548
549define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
550; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
551; CHECK:         .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
552; CHECK-NEXT:  # %bb.0:
553; CHECK-NEXT:    local.get 0
554; CHECK-NEXT:    i32.const -16
555; CHECK-NEXT:    i32.add
556; CHECK-NEXT:    v128.load 0
557; CHECK-NEXT:    # fallthrough-return
558  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
559  %v = load <8 x i16>, <8 x i16>* %s
560  ret <8 x i16> %v
561}
562
563define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
564; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
565; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
566; CHECK-NEXT:  # %bb.0:
567; CHECK-NEXT:    local.get 0
568; CHECK-NEXT:    i32.const -2
569; CHECK-NEXT:    i32.add
570; CHECK-NEXT:    v128.load16_splat 0
571; CHECK-NEXT:    # fallthrough-return
572  %s = getelementptr inbounds i16, i16* %p, i32 -1
573  %e = load i16, i16* %s
574  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
575  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
576  ret <8 x i16> %v2
577}
578
579define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
580; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
581; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
582; CHECK-NEXT:  # %bb.0:
583; CHECK-NEXT:    local.get 0
584; CHECK-NEXT:    i32.const -8
585; CHECK-NEXT:    i32.add
586; CHECK-NEXT:    i16x8.load8x8_s 0
587; CHECK-NEXT:    # fallthrough-return
588  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
589  %v = load <8 x i8>, <8 x i8>* %s
590  %v2 = sext <8 x i8> %v to <8 x i16>
591  ret <8 x i16> %v2
592}
593
594define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
595; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
596; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
597; CHECK-NEXT:  # %bb.0:
598; CHECK-NEXT:    local.get 0
599; CHECK-NEXT:    i32.const -8
600; CHECK-NEXT:    i32.add
601; CHECK-NEXT:    i16x8.load8x8_u 0
602; CHECK-NEXT:    # fallthrough-return
603  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
604  %v = load <8 x i8>, <8 x i8>* %s
605  %v2 = zext <8 x i8> %v to <8 x i16>
606  ret <8 x i16> %v2
607}
608
609define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
610; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
611; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
612; CHECK-NEXT:  # %bb.0:
613; CHECK-NEXT:    local.get 0
614; CHECK-NEXT:    i32.const -8
615; CHECK-NEXT:    i32.add
616; CHECK-NEXT:    v128.load64_zero 0
617; CHECK-NEXT:    # fallthrough-return
618  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
619  %v = load <8 x i8>, <8 x i8>* %s
620  ret <8 x i8> %v
621}
622
623define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
624; CHECK-LABEL: load_v8i16_with_unfolded_offset:
625; CHECK:         .functype load_v8i16_with_unfolded_offset (i32) -> (v128)
626; CHECK-NEXT:  # %bb.0:
627; CHECK-NEXT:    local.get 0
628; CHECK-NEXT:    i32.const 16
629; CHECK-NEXT:    i32.add
630; CHECK-NEXT:    v128.load 0
631; CHECK-NEXT:    # fallthrough-return
632  %q = ptrtoint <8 x i16>* %p to i32
633  %r = add nsw i32 %q, 16
634  %s = inttoptr i32 %r to <8 x i16>*
635  %v = load <8 x i16>, <8 x i16>* %s
636  ret <8 x i16> %v
637}
638
639define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
640; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
641; CHECK:         .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128)
642; CHECK-NEXT:  # %bb.0:
643; CHECK-NEXT:    local.get 0
644; CHECK-NEXT:    i32.const 16
645; CHECK-NEXT:    i32.add
646; CHECK-NEXT:    v128.load16_splat 0
647; CHECK-NEXT:    # fallthrough-return
648  %q = ptrtoint i16* %p to i32
649  %r = add nsw i32 %q, 16
650  %s = inttoptr i32 %r to i16*
651  %e = load i16, i16* %s
652  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
653  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
654  ret <8 x i16> %v2
655}
656
657define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
658; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
659; CHECK:         .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128)
660; CHECK-NEXT:  # %bb.0:
661; CHECK-NEXT:    local.get 0
662; CHECK-NEXT:    i32.const 16
663; CHECK-NEXT:    i32.add
664; CHECK-NEXT:    i16x8.load8x8_s 0
665; CHECK-NEXT:    # fallthrough-return
666  %q = ptrtoint <8 x i8>* %p to i32
667  %r = add nsw i32 %q, 16
668  %s = inttoptr i32 %r to <8 x i8>*
669  %v = load <8 x i8>, <8 x i8>* %s
670  %v2 = sext <8 x i8> %v to <8 x i16>
671  ret <8 x i16> %v2
672}
673
674define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
675; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
676; CHECK:         .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128)
677; CHECK-NEXT:  # %bb.0:
678; CHECK-NEXT:    local.get 0
679; CHECK-NEXT:    i32.const 16
680; CHECK-NEXT:    i32.add
681; CHECK-NEXT:    i16x8.load8x8_u 0
682; CHECK-NEXT:    # fallthrough-return
683  %q = ptrtoint <8 x i8>* %p to i32
684  %r = add nsw i32 %q, 16
685  %s = inttoptr i32 %r to <8 x i8>*
686  %v = load <8 x i8>, <8 x i8>* %s
687  %v2 = zext <8 x i8> %v to <8 x i16>
688  ret <8 x i16> %v2
689}
690
691define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
692; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
693; CHECK:         .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128)
694; CHECK-NEXT:  # %bb.0:
695; CHECK-NEXT:    local.get 0
696; CHECK-NEXT:    i32.const 16
697; CHECK-NEXT:    i32.add
698; CHECK-NEXT:    v128.load64_zero 0
699; CHECK-NEXT:    # fallthrough-return
700  %q = ptrtoint <8 x i8>* %p to i32
701  %r = add nsw i32 %q, 16
702  %s = inttoptr i32 %r to <8 x i8>*
703  %v = load <8 x i8>, <8 x i8>* %s
704  ret <8 x i8> %v
705}
706
707define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
708; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
709; CHECK:         .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128)
710; CHECK-NEXT:  # %bb.0:
711; CHECK-NEXT:    local.get 0
712; CHECK-NEXT:    i32.const 16
713; CHECK-NEXT:    i32.add
714; CHECK-NEXT:    v128.load 0
715; CHECK-NEXT:    # fallthrough-return
716  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
717  %v = load <8 x i16>, <8 x i16>* %s
718  ret <8 x i16> %v
719}
720
721define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
722; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
723; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128)
724; CHECK-NEXT:  # %bb.0:
725; CHECK-NEXT:    local.get 0
726; CHECK-NEXT:    i32.const 2
727; CHECK-NEXT:    i32.add
728; CHECK-NEXT:    v128.load16_splat 0
729; CHECK-NEXT:    # fallthrough-return
730  %s = getelementptr i16, i16* %p, i32 1
731  %e = load i16, i16* %s
732  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
733  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
734  ret <8 x i16> %v2
735}
736
737define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
738; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
739; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
740; CHECK-NEXT:  # %bb.0:
741; CHECK-NEXT:    local.get 0
742; CHECK-NEXT:    i32.const 8
743; CHECK-NEXT:    i32.add
744; CHECK-NEXT:    i16x8.load8x8_s 0
745; CHECK-NEXT:    # fallthrough-return
746  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
747  %v = load <8 x i8>, <8 x i8>* %s
748  %v2 = sext <8 x i8> %v to <8 x i16>
749  ret <8 x i16> %v2
750}
751
752define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
753; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
754; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
755; CHECK-NEXT:  # %bb.0:
756; CHECK-NEXT:    local.get 0
757; CHECK-NEXT:    i32.const 8
758; CHECK-NEXT:    i32.add
759; CHECK-NEXT:    i16x8.load8x8_u 0
760; CHECK-NEXT:    # fallthrough-return
761  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
762  %v = load <8 x i8>, <8 x i8>* %s
763  %v2 = zext <8 x i8> %v to <8 x i16>
764  ret <8 x i16> %v2
765}
766
767define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
768; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
769; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
770; CHECK-NEXT:  # %bb.0:
771; CHECK-NEXT:    local.get 0
772; CHECK-NEXT:    i32.const 8
773; CHECK-NEXT:    i32.add
774; CHECK-NEXT:    v128.load64_zero 0
775; CHECK-NEXT:    # fallthrough-return
776  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
777  %v = load <8 x i8>, <8 x i8>* %s
778  ret <8 x i8> %v
779}
780
781define <8 x i16> @load_v8i16_from_numeric_address() {
782; CHECK-LABEL: load_v8i16_from_numeric_address:
783; CHECK:         .functype load_v8i16_from_numeric_address () -> (v128)
784; CHECK-NEXT:  # %bb.0:
785; CHECK-NEXT:    i32.const 0
786; CHECK-NEXT:    v128.load 32
787; CHECK-NEXT:    # fallthrough-return
788  %s = inttoptr i32 32 to <8 x i16>*
789  %v = load <8 x i16>, <8 x i16>* %s
790  ret <8 x i16> %v
791}
792
793define <8 x i16> @load_splat_v8i16_from_numeric_address() {
794; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
795; CHECK:         .functype load_splat_v8i16_from_numeric_address () -> (v128)
796; CHECK-NEXT:  # %bb.0:
797; CHECK-NEXT:    i32.const 0
798; CHECK-NEXT:    v128.load16_splat 32
799; CHECK-NEXT:    # fallthrough-return
800  %s = inttoptr i32 32 to i16*
801  %e = load i16, i16* %s
802  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
803  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
804  ret <8 x i16> %v2
805}
806
807define <8 x i16> @load_sext_v8i16_from_numeric_address() {
808; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
809; CHECK:         .functype load_sext_v8i16_from_numeric_address () -> (v128)
810; CHECK-NEXT:  # %bb.0:
811; CHECK-NEXT:    i32.const 0
812; CHECK-NEXT:    i16x8.load8x8_s 32
813; CHECK-NEXT:    # fallthrough-return
814  %s = inttoptr i32 32 to <8 x i8>*
815  %v = load <8 x i8>, <8 x i8>* %s
816  %v2 = sext <8 x i8> %v to <8 x i16>
817  ret <8 x i16> %v2
818}
819
820define <8 x i16> @load_zext_v8i16_from_numeric_address() {
821; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
822; CHECK:         .functype load_zext_v8i16_from_numeric_address () -> (v128)
823; CHECK-NEXT:  # %bb.0:
824; CHECK-NEXT:    i32.const 0
825; CHECK-NEXT:    i16x8.load8x8_u 32
826; CHECK-NEXT:    # fallthrough-return
827  %s = inttoptr i32 32 to <8 x i8>*
828  %v = load <8 x i8>, <8 x i8>* %s
829  %v2 = zext <8 x i8> %v to <8 x i16>
830  ret <8 x i16> %v2
831}
832
833define <8 x i8> @load_ext_v8i16_from_numeric_address() {
834; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
835; CHECK:         .functype load_ext_v8i16_from_numeric_address () -> (v128)
836; CHECK-NEXT:  # %bb.0:
837; CHECK-NEXT:    i32.const 0
838; CHECK-NEXT:    v128.load64_zero 32
839; CHECK-NEXT:    # fallthrough-return
840  %s = inttoptr i32 32 to <8 x i8>*
841  %v = load <8 x i8>, <8 x i8>* %s
842  ret <8 x i8> %v
843}
844
845@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
846define <8 x i16> @load_v8i16_from_global_address() {
847; CHECK-LABEL: load_v8i16_from_global_address:
848; CHECK:         .functype load_v8i16_from_global_address () -> (v128)
849; CHECK-NEXT:  # %bb.0:
850; CHECK-NEXT:    i32.const 0
851; CHECK-NEXT:    v128.load gv_v8i16
852; CHECK-NEXT:    # fallthrough-return
853  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
854  ret <8 x i16> %v
855}
856
857@gv_i16 = global i16 42
858define <8 x i16> @load_splat_v8i16_from_global_address() {
859; CHECK-LABEL: load_splat_v8i16_from_global_address:
860; CHECK:         .functype load_splat_v8i16_from_global_address () -> (v128)
861; CHECK-NEXT:  # %bb.0:
862; CHECK-NEXT:    i32.const 0
863; CHECK-NEXT:    v128.load16_splat gv_i16
864; CHECK-NEXT:    # fallthrough-return
865  %e = load i16, i16* @gv_i16
866  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
867  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
868  ret <8 x i16> %v2
869}
870
871@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
872define <8 x i16> @load_sext_v8i16_from_global_address() {
873; CHECK-LABEL: load_sext_v8i16_from_global_address:
874; CHECK:         .functype load_sext_v8i16_from_global_address () -> (v128)
875; CHECK-NEXT:  # %bb.0:
876; CHECK-NEXT:    i32.const 0
877; CHECK-NEXT:    i16x8.load8x8_s gv_v8i8
878; CHECK-NEXT:    # fallthrough-return
879  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
880  %v2 = sext <8 x i8> %v to <8 x i16>
881  ret <8 x i16> %v2
882}
883
884define <8 x i16> @load_zext_v8i16_from_global_address() {
885; CHECK-LABEL: load_zext_v8i16_from_global_address:
886; CHECK:         .functype load_zext_v8i16_from_global_address () -> (v128)
887; CHECK-NEXT:  # %bb.0:
888; CHECK-NEXT:    i32.const 0
889; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
890; CHECK-NEXT:    # fallthrough-return
891  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
892  %v2 = zext <8 x i8> %v to <8 x i16>
893  ret <8 x i16> %v2
894}
895
896define <8 x i8> @load_ext_v8i16_from_global_address() {
897; CHECK-LABEL: load_ext_v8i16_from_global_address:
898; CHECK:         .functype load_ext_v8i16_from_global_address () -> (v128)
899; CHECK-NEXT:  # %bb.0:
900; CHECK-NEXT:    i32.const 0
901; CHECK-NEXT:    v128.load64_zero gv_v8i8
902; CHECK-NEXT:    # fallthrough-return
903  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
904  ret <8 x i8> %v
905}
906
907
908define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
909; CHECK-LABEL: store_v8i16:
910; CHECK:         .functype store_v8i16 (v128, i32) -> ()
911; CHECK-NEXT:  # %bb.0:
912; CHECK-NEXT:    local.get 1
913; CHECK-NEXT:    local.get 0
914; CHECK-NEXT:    v128.store 0
915; CHECK-NEXT:    # fallthrough-return
916  store <8 x i16> %v , <8 x i16>* %p
917  ret void
918}
919
920define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) {
921; CHECK-LABEL: store_narrowing_v8i16:
922; CHECK:         .functype store_narrowing_v8i16 (v128, i32) -> ()
923; CHECK-NEXT:  # %bb.0:
924; CHECK-NEXT:    local.get 1
925; CHECK-NEXT:    local.get 0
926; CHECK-NEXT:    v128.store64_lane 0, 0
927; CHECK-NEXT:    # fallthrough-return
928  store <8 x i8> %v, <8 x i8>* %p
929  ret void
930}
931
932define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
933; CHECK-LABEL: store_v8i16_with_folded_offset:
934; CHECK:         .functype store_v8i16_with_folded_offset (v128, i32) -> ()
935; CHECK-NEXT:  # %bb.0:
936; CHECK-NEXT:    local.get 1
937; CHECK-NEXT:    local.get 0
938; CHECK-NEXT:    v128.store 16
939; CHECK-NEXT:    # fallthrough-return
940  %q = ptrtoint <8 x i16>* %p to i32
941  %r = add nuw i32 %q, 16
942  %s = inttoptr i32 %r to <8 x i16>*
943  store <8 x i16> %v , <8 x i16>* %s
944  ret void
945}
946
947define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) {
948; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset:
949; CHECK:         .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
950; CHECK-NEXT:  # %bb.0:
951; CHECK-NEXT:    local.get 1
952; CHECK-NEXT:    i32.const 16
953; CHECK-NEXT:    i32.add
954; CHECK-NEXT:    local.get 0
955; CHECK-NEXT:    v128.store64_lane 0, 0
956; CHECK-NEXT:    # fallthrough-return
957  %q = ptrtoint <8 x i8>* %p to i32
958  %r = add nuw i32 %q, 16
959  %s = inttoptr i32 %r to <8 x i8>*
960  store <8 x i8> %v , <8 x i8>* %s
961  ret void
962}
963
964define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
965; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
966; CHECK:         .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
967; CHECK-NEXT:  # %bb.0:
968; CHECK-NEXT:    local.get 1
969; CHECK-NEXT:    local.get 0
970; CHECK-NEXT:    v128.store 16
971; CHECK-NEXT:    # fallthrough-return
972  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
973  store <8 x i16> %v , <8 x i16>* %s
974  ret void
975}
976
977define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
978; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset:
979; CHECK:         .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
980; CHECK-NEXT:  # %bb.0:
981; CHECK-NEXT:    local.get 1
982; CHECK-NEXT:    i32.const 8
983; CHECK-NEXT:    i32.add
984; CHECK-NEXT:    local.get 0
985; CHECK-NEXT:    v128.store64_lane 0, 0
986; CHECK-NEXT:    # fallthrough-return
987  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
988  store <8 x i8> %v , <8 x i8>* %s
989  ret void
990}
991
992define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
993; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
994; CHECK:         .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
995; CHECK-NEXT:  # %bb.0:
996; CHECK-NEXT:    local.get 1
997; CHECK-NEXT:    i32.const -16
998; CHECK-NEXT:    i32.add
999; CHECK-NEXT:    local.get 0
1000; CHECK-NEXT:    v128.store 0
1001; CHECK-NEXT:    # fallthrough-return
1002  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
1003  store <8 x i16> %v , <8 x i16>* %s
1004  ret void
1005}
1006
1007define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) {
1008; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset:
1009; CHECK:         .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
1010; CHECK-NEXT:  # %bb.0:
1011; CHECK-NEXT:    local.get 1
1012; CHECK-NEXT:    i32.const -8
1013; CHECK-NEXT:    i32.add
1014; CHECK-NEXT:    local.get 0
1015; CHECK-NEXT:    v128.store64_lane 0, 0
1016; CHECK-NEXT:    # fallthrough-return
1017  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
1018  store <8 x i8> %v , <8 x i8>* %s
1019  ret void
1020}
1021
1022define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
1023; CHECK-LABEL: store_v8i16_with_unfolded_offset:
1024; CHECK:         .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
1025; CHECK-NEXT:  # %bb.0:
1026; CHECK-NEXT:    local.get 1
1027; CHECK-NEXT:    i32.const 16
1028; CHECK-NEXT:    i32.add
1029; CHECK-NEXT:    local.get 0
1030; CHECK-NEXT:    v128.store 0
1031; CHECK-NEXT:    # fallthrough-return
1032  %q = ptrtoint <8 x i16>* %p to i32
1033  %r = add nsw i32 %q, 16
1034  %s = inttoptr i32 %r to <8 x i16>*
1035  store <8 x i16> %v , <8 x i16>* %s
1036  ret void
1037}
1038
1039define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) {
1040; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset:
1041; CHECK:         .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> ()
1042; CHECK-NEXT:  # %bb.0:
1043; CHECK-NEXT:    local.get 1
1044; CHECK-NEXT:    i32.const 16
1045; CHECK-NEXT:    i32.add
1046; CHECK-NEXT:    local.get 0
1047; CHECK-NEXT:    v128.store64_lane 0, 0
1048; CHECK-NEXT:    # fallthrough-return
1049  %q = ptrtoint <8 x i8>* %p to i32
1050  %r = add nsw i32 %q, 16
1051  %s = inttoptr i32 %r to <8 x i8>*
1052  store <8 x i8> %v , <8 x i8>* %s
1053  ret void
1054}
1055
1056define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
1057; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
1058; CHECK:         .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1059; CHECK-NEXT:  # %bb.0:
1060; CHECK-NEXT:    local.get 1
1061; CHECK-NEXT:    i32.const 16
1062; CHECK-NEXT:    i32.add
1063; CHECK-NEXT:    local.get 0
1064; CHECK-NEXT:    v128.store 0
1065; CHECK-NEXT:    # fallthrough-return
1066  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
1067  store <8 x i16> %v , <8 x i16>* %s
1068  ret void
1069}
1070
1071define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
1072; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset:
1073; CHECK:         .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1074; CHECK-NEXT:  # %bb.0:
1075; CHECK-NEXT:    local.get 1
1076; CHECK-NEXT:    i32.const 8
1077; CHECK-NEXT:    i32.add
1078; CHECK-NEXT:    local.get 0
1079; CHECK-NEXT:    v128.store64_lane 0, 0
1080; CHECK-NEXT:    # fallthrough-return
1081  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
1082  store <8 x i8> %v , <8 x i8>* %s
1083  ret void
1084}
1085
1086define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
1087; CHECK-LABEL: store_v8i16_to_numeric_address:
1088; CHECK:         .functype store_v8i16_to_numeric_address (v128) -> ()
1089; CHECK-NEXT:  # %bb.0:
1090; CHECK-NEXT:    i32.const 0
1091; CHECK-NEXT:    local.get 0
1092; CHECK-NEXT:    v128.store 32
1093; CHECK-NEXT:    # fallthrough-return
1094  %s = inttoptr i32 32 to <8 x i16>*
1095  store <8 x i16> %v , <8 x i16>* %s
1096  ret void
1097}
1098
1099define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) {
1100; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address:
1101; CHECK:         .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
1102; CHECK-NEXT:  # %bb.0:
1103; CHECK-NEXT:    i32.const 32
1104; CHECK-NEXT:    local.get 0
1105; CHECK-NEXT:    v128.store64_lane 0, 0
1106; CHECK-NEXT:    # fallthrough-return
1107  %s = inttoptr i32 32 to <8 x i8>*
1108  store <8 x i8> %v , <8 x i8>* %s
1109  ret void
1110}
1111
1112define void @store_v8i16_to_global_address(<8 x i16> %v) {
1113; CHECK-LABEL: store_v8i16_to_global_address:
1114; CHECK:         .functype store_v8i16_to_global_address (v128) -> ()
1115; CHECK-NEXT:  # %bb.0:
1116; CHECK-NEXT:    i32.const 0
1117; CHECK-NEXT:    local.get 0
1118; CHECK-NEXT:    v128.store gv_v8i16
1119; CHECK-NEXT:    # fallthrough-return
1120  store <8 x i16> %v , <8 x i16>* @gv_v8i16
1121  ret void
1122}
1123
1124define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
1125; CHECK-LABEL: store_narrowing_v8i16_to_global_address:
1126; CHECK:         .functype store_narrowing_v8i16_to_global_address (v128) -> ()
1127; CHECK-NEXT:  # %bb.0:
1128; CHECK-NEXT:    i32.const gv_v8i8
1129; CHECK-NEXT:    local.get 0
1130; CHECK-NEXT:    v128.store64_lane 0, 0
1131; CHECK-NEXT:    # fallthrough-return
1132  store <8 x i8> %v , <8 x i8>* @gv_v8i8
1133  ret void
1134}
1135
1136; ==============================================================================
1137; 4 x i32
1138; ==============================================================================
1139define <4 x i32> @load_v4i32(<4 x i32>* %p) {
1140; CHECK-LABEL: load_v4i32:
1141; CHECK:         .functype load_v4i32 (i32) -> (v128)
1142; CHECK-NEXT:  # %bb.0:
1143; CHECK-NEXT:    local.get 0
1144; CHECK-NEXT:    v128.load 0
1145; CHECK-NEXT:    # fallthrough-return
1146  %v = load <4 x i32>, <4 x i32>* %p
1147  ret <4 x i32> %v
1148}
1149
1150define <4 x i32> @load_splat_v4i32(i32* %addr) {
1151; CHECK-LABEL: load_splat_v4i32:
1152; CHECK:         .functype load_splat_v4i32 (i32) -> (v128)
1153; CHECK-NEXT:  # %bb.0:
1154; CHECK-NEXT:    local.get 0
1155; CHECK-NEXT:    v128.load32_splat 0
1156; CHECK-NEXT:    # fallthrough-return
1157  %e = load i32, i32* %addr, align 4
1158  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1159  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1160  ret <4 x i32> %v2
1161}
1162
1163define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
1164; CHECK-LABEL: load_sext_v4i32:
1165; CHECK:         .functype load_sext_v4i32 (i32) -> (v128)
1166; CHECK-NEXT:  # %bb.0:
1167; CHECK-NEXT:    local.get 0
1168; CHECK-NEXT:    i32x4.load16x4_s 0
1169; CHECK-NEXT:    # fallthrough-return
1170  %v = load <4 x i16>, <4 x i16>* %p
1171  %v2 = sext <4 x i16> %v to <4 x i32>
1172  ret <4 x i32> %v2
1173}
1174
1175define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
1176; CHECK-LABEL: load_zext_v4i32:
1177; CHECK:         .functype load_zext_v4i32 (i32) -> (v128)
1178; CHECK-NEXT:  # %bb.0:
1179; CHECK-NEXT:    local.get 0
1180; CHECK-NEXT:    i32x4.load16x4_u 0
1181; CHECK-NEXT:    # fallthrough-return
1182  %v = load <4 x i16>, <4 x i16>* %p
1183  %v2 = zext <4 x i16> %v to <4 x i32>
1184  ret <4 x i32> %v2
1185}
1186
1187define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
1188; CHECK-LABEL: load_ext_v4i32:
1189; CHECK:         .functype load_ext_v4i32 (i32) -> (v128)
1190; CHECK-NEXT:  # %bb.0:
1191; CHECK-NEXT:    local.get 0
1192; CHECK-NEXT:    v128.load64_zero 0
1193; CHECK-NEXT:    # fallthrough-return
1194  %v = load <4 x i16>, <4 x i16>* %p
1195  ret <4 x i16> %v
1196}
1197
1198define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
1199; CHECK-LABEL: load_v4i32_with_folded_offset:
1200; CHECK:         .functype load_v4i32_with_folded_offset (i32) -> (v128)
1201; CHECK-NEXT:  # %bb.0:
1202; CHECK-NEXT:    local.get 0
1203; CHECK-NEXT:    v128.load 16
1204; CHECK-NEXT:    # fallthrough-return
1205  %q = ptrtoint <4 x i32>* %p to i32
1206  %r = add nuw i32 %q, 16
1207  %s = inttoptr i32 %r to <4 x i32>*
1208  %v = load <4 x i32>, <4 x i32>* %s
1209  ret <4 x i32> %v
1210}
1211
1212define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
1213; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1214; CHECK:         .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
1215; CHECK-NEXT:  # %bb.0:
1216; CHECK-NEXT:    local.get 0
1217; CHECK-NEXT:    v128.load32_splat 16
1218; CHECK-NEXT:    # fallthrough-return
1219  %q = ptrtoint i32* %p to i32
1220  %r = add nuw i32 %q, 16
1221  %s = inttoptr i32 %r to i32*
1222  %e = load i32, i32* %s
1223  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1224  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1225  ret <4 x i32> %v2
1226}
1227
1228define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
1229; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
1230; CHECK:         .functype load_sext_v4i32_with_folded_offset (i32) -> (v128)
1231; CHECK-NEXT:  # %bb.0:
1232; CHECK-NEXT:    local.get 0
1233; CHECK-NEXT:    i32x4.load16x4_s 16
1234; CHECK-NEXT:    # fallthrough-return
1235  %q = ptrtoint <4 x i16>* %p to i32
1236  %r = add nuw i32 %q, 16
1237  %s = inttoptr i32 %r to <4 x i16>*
1238  %v = load <4 x i16>, <4 x i16>* %s
1239  %v2 = sext <4 x i16> %v to <4 x i32>
1240  ret <4 x i32> %v2
1241}
1242
1243define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
1244; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
1245; CHECK:         .functype load_zext_v4i32_with_folded_offset (i32) -> (v128)
1246; CHECK-NEXT:  # %bb.0:
1247; CHECK-NEXT:    local.get 0
1248; CHECK-NEXT:    i32x4.load16x4_u 16
1249; CHECK-NEXT:    # fallthrough-return
1250  %q = ptrtoint <4 x i16>* %p to i32
1251  %r = add nuw i32 %q, 16
1252  %s = inttoptr i32 %r to <4 x i16>*
1253  %v = load <4 x i16>, <4 x i16>* %s
1254  %v2 = zext <4 x i16> %v to <4 x i32>
1255  ret <4 x i32> %v2
1256}
1257
1258define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
1259; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1260; CHECK:         .functype load_ext_v4i32_with_folded_offset (i32) -> (v128)
1261; CHECK-NEXT:  # %bb.0:
1262; CHECK-NEXT:    local.get 0
1263; CHECK-NEXT:    v128.load64_zero 16
1264; CHECK-NEXT:    # fallthrough-return
1265  %q = ptrtoint <4 x i16>* %p to i32
1266  %r = add nuw i32 %q, 16
1267  %s = inttoptr i32 %r to <4 x i16>*
1268  %v = load <4 x i16>, <4 x i16>* %s
1269  ret <4 x i16> %v
1270}
1271
1272define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
1273; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1274; CHECK:         .functype load_v4i32_with_folded_gep_offset (i32) -> (v128)
1275; CHECK-NEXT:  # %bb.0:
1276; CHECK-NEXT:    local.get 0
1277; CHECK-NEXT:    v128.load 16
1278; CHECK-NEXT:    # fallthrough-return
1279  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1280  %v = load <4 x i32>, <4 x i32>* %s
1281  ret <4 x i32> %v
1282}
1283
1284define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
1285; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1286; CHECK:         .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
1287; CHECK-NEXT:  # %bb.0:
1288; CHECK-NEXT:    local.get 0
1289; CHECK-NEXT:    v128.load32_splat 4
1290; CHECK-NEXT:    # fallthrough-return
1291  %s = getelementptr inbounds i32, i32* %p, i32 1
1292  %e = load i32, i32* %s
1293  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1294  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1295  ret <4 x i32> %v2
1296}
1297
1298define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1299; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
1300; CHECK:         .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128)
1301; CHECK-NEXT:  # %bb.0:
1302; CHECK-NEXT:    local.get 0
1303; CHECK-NEXT:    i32x4.load16x4_s 8
1304; CHECK-NEXT:    # fallthrough-return
1305  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1306  %v = load <4 x i16>, <4 x i16>* %s
1307  %v2 = sext <4 x i16> %v to <4 x i32>
1308  ret <4 x i32> %v2
1309}
1310
1311define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1312; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
1313; CHECK:         .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128)
1314; CHECK-NEXT:  # %bb.0:
1315; CHECK-NEXT:    local.get 0
1316; CHECK-NEXT:    i32x4.load16x4_u 8
1317; CHECK-NEXT:    # fallthrough-return
1318  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1319  %v = load <4 x i16>, <4 x i16>* %s
1320  %v2 = zext <4 x i16> %v to <4 x i32>
1321  ret <4 x i32> %v2
1322}
1323
1324define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1325; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1326; CHECK:         .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128)
1327; CHECK-NEXT:  # %bb.0:
1328; CHECK-NEXT:    local.get 0
1329; CHECK-NEXT:    v128.load64_zero 8
1330; CHECK-NEXT:    # fallthrough-return
1331  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1332  %v = load <4 x i16>, <4 x i16>* %s
1333  ret <4 x i16> %v
1334}
1335
1336define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
1337; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1338; CHECK:         .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1339; CHECK-NEXT:  # %bb.0:
1340; CHECK-NEXT:    local.get 0
1341; CHECK-NEXT:    i32.const -16
1342; CHECK-NEXT:    i32.add
1343; CHECK-NEXT:    v128.load 0
1344; CHECK-NEXT:    # fallthrough-return
1345  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1346  %v = load <4 x i32>, <4 x i32>* %s
1347  ret <4 x i32> %v
1348}
1349
1350define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
1351; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1352; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1353; CHECK-NEXT:  # %bb.0:
1354; CHECK-NEXT:    local.get 0
1355; CHECK-NEXT:    i32.const -4
1356; CHECK-NEXT:    i32.add
1357; CHECK-NEXT:    v128.load32_splat 0
1358; CHECK-NEXT:    # fallthrough-return
1359  %s = getelementptr inbounds i32, i32* %p, i32 -1
1360  %e = load i32, i32* %s
1361  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1362  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1363  ret <4 x i32> %v2
1364}
1365
1366define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1367; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
1368; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1369; CHECK-NEXT:  # %bb.0:
1370; CHECK-NEXT:    local.get 0
1371; CHECK-NEXT:    i32.const -8
1372; CHECK-NEXT:    i32.add
1373; CHECK-NEXT:    i32x4.load16x4_s 0
1374; CHECK-NEXT:    # fallthrough-return
1375  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1376  %v = load <4 x i16>, <4 x i16>* %s
1377  %v2 = sext <4 x i16> %v to <4 x i32>
1378  ret <4 x i32> %v2
1379}
1380
1381define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1382; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
1383; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1384; CHECK-NEXT:  # %bb.0:
1385; CHECK-NEXT:    local.get 0
1386; CHECK-NEXT:    i32.const -8
1387; CHECK-NEXT:    i32.add
1388; CHECK-NEXT:    i32x4.load16x4_u 0
1389; CHECK-NEXT:    # fallthrough-return
1390  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1391  %v = load <4 x i16>, <4 x i16>* %s
1392  %v2 = zext <4 x i16> %v to <4 x i32>
1393  ret <4 x i32> %v2
1394}
1395
1396define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1397; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1398; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1399; CHECK-NEXT:  # %bb.0:
1400; CHECK-NEXT:    local.get 0
1401; CHECK-NEXT:    i32.const -8
1402; CHECK-NEXT:    i32.add
1403; CHECK-NEXT:    v128.load64_zero 0
1404; CHECK-NEXT:    # fallthrough-return
1405  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1406  %v = load <4 x i16>, <4 x i16>* %s
1407  ret <4 x i16> %v
1408}
1409
1410define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
1411; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1412; CHECK:         .functype load_v4i32_with_unfolded_offset (i32) -> (v128)
1413; CHECK-NEXT:  # %bb.0:
1414; CHECK-NEXT:    local.get 0
1415; CHECK-NEXT:    i32.const 16
1416; CHECK-NEXT:    i32.add
1417; CHECK-NEXT:    v128.load 0
1418; CHECK-NEXT:    # fallthrough-return
1419  %q = ptrtoint <4 x i32>* %p to i32
1420  %r = add nsw i32 %q, 16
1421  %s = inttoptr i32 %r to <4 x i32>*
1422  %v = load <4 x i32>, <4 x i32>* %s
1423  ret <4 x i32> %v
1424}
1425
1426define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
1427; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1428; CHECK:         .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128)
1429; CHECK-NEXT:  # %bb.0:
1430; CHECK-NEXT:    local.get 0
1431; CHECK-NEXT:    i32.const 16
1432; CHECK-NEXT:    i32.add
1433; CHECK-NEXT:    v128.load32_splat 0
1434; CHECK-NEXT:    # fallthrough-return
1435  %q = ptrtoint i32* %p to i32
1436  %r = add nsw i32 %q, 16
1437  %s = inttoptr i32 %r to i32*
1438  %e = load i32, i32* %s
1439  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1440  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1441  ret <4 x i32> %v2
1442}
1443
1444define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1445; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
1446; CHECK:         .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128)
1447; CHECK-NEXT:  # %bb.0:
1448; CHECK-NEXT:    local.get 0
1449; CHECK-NEXT:    i32.const 16
1450; CHECK-NEXT:    i32.add
1451; CHECK-NEXT:    i32x4.load16x4_s 0
1452; CHECK-NEXT:    # fallthrough-return
1453  %q = ptrtoint <4 x i16>* %p to i32
1454  %r = add nsw i32 %q, 16
1455  %s = inttoptr i32 %r to <4 x i16>*
1456  %v = load <4 x i16>, <4 x i16>* %s
1457  %v2 = sext <4 x i16> %v to <4 x i32>
1458  ret <4 x i32> %v2
1459}
1460
1461define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1462; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
1463; CHECK:         .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128)
1464; CHECK-NEXT:  # %bb.0:
1465; CHECK-NEXT:    local.get 0
1466; CHECK-NEXT:    i32.const 16
1467; CHECK-NEXT:    i32.add
1468; CHECK-NEXT:    i32x4.load16x4_u 0
1469; CHECK-NEXT:    # fallthrough-return
1470  %q = ptrtoint <4 x i16>* %p to i32
1471  %r = add nsw i32 %q, 16
1472  %s = inttoptr i32 %r to <4 x i16>*
1473  %v = load <4 x i16>, <4 x i16>* %s
1474  %v2 = zext <4 x i16> %v to <4 x i32>
1475  ret <4 x i32> %v2
1476}
1477
1478define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1479; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1480; CHECK:         .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128)
1481; CHECK-NEXT:  # %bb.0:
1482; CHECK-NEXT:    local.get 0
1483; CHECK-NEXT:    i32.const 16
1484; CHECK-NEXT:    i32.add
1485; CHECK-NEXT:    v128.load64_zero 0
1486; CHECK-NEXT:    # fallthrough-return
1487  %q = ptrtoint <4 x i16>* %p to i32
1488  %r = add nsw i32 %q, 16
1489  %s = inttoptr i32 %r to <4 x i16>*
1490  %v = load <4 x i16>, <4 x i16>* %s
1491  ret <4 x i16> %v
1492}
1493
1494define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
1495; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1496; CHECK:         .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1497; CHECK-NEXT:  # %bb.0:
1498; CHECK-NEXT:    local.get 0
1499; CHECK-NEXT:    i32.const 16
1500; CHECK-NEXT:    i32.add
1501; CHECK-NEXT:    v128.load 0
1502; CHECK-NEXT:    # fallthrough-return
1503  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1504  %v = load <4 x i32>, <4 x i32>* %s
1505  ret <4 x i32> %v
1506}
1507
1508define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
1509; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1510; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1511; CHECK-NEXT:  # %bb.0:
1512; CHECK-NEXT:    local.get 0
1513; CHECK-NEXT:    i32.const 4
1514; CHECK-NEXT:    i32.add
1515; CHECK-NEXT:    v128.load32_splat 0
1516; CHECK-NEXT:    # fallthrough-return
1517  %s = getelementptr i32, i32* %p, i32 1
1518  %e = load i32, i32* %s
1519  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1520  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1521  ret <4 x i32> %v2
1522}
1523
1524define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1525; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
1526; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1527; CHECK-NEXT:  # %bb.0:
1528; CHECK-NEXT:    local.get 0
1529; CHECK-NEXT:    i32.const 8
1530; CHECK-NEXT:    i32.add
1531; CHECK-NEXT:    i32x4.load16x4_s 0
1532; CHECK-NEXT:    # fallthrough-return
1533  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1534  %v = load <4 x i16>, <4 x i16>* %s
1535  %v2 = sext <4 x i16> %v to <4 x i32>
1536  ret <4 x i32> %v2
1537}
1538
1539define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1540; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
1541; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1542; CHECK-NEXT:  # %bb.0:
1543; CHECK-NEXT:    local.get 0
1544; CHECK-NEXT:    i32.const 8
1545; CHECK-NEXT:    i32.add
1546; CHECK-NEXT:    i32x4.load16x4_u 0
1547; CHECK-NEXT:    # fallthrough-return
1548  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1549  %v = load <4 x i16>, <4 x i16>* %s
1550  %v2 = zext <4 x i16> %v to <4 x i32>
1551  ret <4 x i32> %v2
1552}
1553
1554define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1555; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1556; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1557; CHECK-NEXT:  # %bb.0:
1558; CHECK-NEXT:    local.get 0
1559; CHECK-NEXT:    i32.const 8
1560; CHECK-NEXT:    i32.add
1561; CHECK-NEXT:    v128.load64_zero 0
1562; CHECK-NEXT:    # fallthrough-return
1563  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1564  %v = load <4 x i16>, <4 x i16>* %s
1565  ret <4 x i16> %v
1566}
1567
1568define <4 x i32> @load_v4i32_from_numeric_address() {
1569; CHECK-LABEL: load_v4i32_from_numeric_address:
1570; CHECK:         .functype load_v4i32_from_numeric_address () -> (v128)
1571; CHECK-NEXT:  # %bb.0:
1572; CHECK-NEXT:    i32.const 0
1573; CHECK-NEXT:    v128.load 32
1574; CHECK-NEXT:    # fallthrough-return
1575  %s = inttoptr i32 32 to <4 x i32>*
1576  %v = load <4 x i32>, <4 x i32>* %s
1577  ret <4 x i32> %v
1578}
1579
1580define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1581; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1582; CHECK:         .functype load_splat_v4i32_from_numeric_address () -> (v128)
1583; CHECK-NEXT:  # %bb.0:
1584; CHECK-NEXT:    i32.const 0
1585; CHECK-NEXT:    v128.load32_splat 32
1586; CHECK-NEXT:    # fallthrough-return
1587  %s = inttoptr i32 32 to i32*
1588  %e = load i32, i32* %s
1589  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1590  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1591  ret <4 x i32> %v2
1592}
1593
1594define <4 x i32> @load_sext_v4i32_from_numeric_address() {
1595; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
1596; CHECK:         .functype load_sext_v4i32_from_numeric_address () -> (v128)
1597; CHECK-NEXT:  # %bb.0:
1598; CHECK-NEXT:    i32.const 0
1599; CHECK-NEXT:    i32x4.load16x4_s 32
1600; CHECK-NEXT:    # fallthrough-return
1601  %s = inttoptr i32 32 to <4 x i16>*
1602  %v = load <4 x i16>, <4 x i16>* %s
1603  %v2 = sext <4 x i16> %v to <4 x i32>
1604  ret <4 x i32> %v2
1605}
1606
1607define <4 x i32> @load_zext_v4i32_from_numeric_address() {
1608; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
1609; CHECK:         .functype load_zext_v4i32_from_numeric_address () -> (v128)
1610; CHECK-NEXT:  # %bb.0:
1611; CHECK-NEXT:    i32.const 0
1612; CHECK-NEXT:    i32x4.load16x4_u 32
1613; CHECK-NEXT:    # fallthrough-return
1614  %s = inttoptr i32 32 to <4 x i16>*
1615  %v = load <4 x i16>, <4 x i16>* %s
1616  %v2 = zext <4 x i16> %v to <4 x i32>
1617  ret <4 x i32> %v2
1618}
1619
1620define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1621; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1622; CHECK:         .functype load_ext_v4i32_from_numeric_address () -> (v128)
1623; CHECK-NEXT:  # %bb.0:
1624; CHECK-NEXT:    i32.const 0
1625; CHECK-NEXT:    v128.load64_zero 32
1626; CHECK-NEXT:    # fallthrough-return
1627  %s = inttoptr i32 32 to <4 x i16>*
1628  %v = load <4 x i16>, <4 x i16>* %s
1629  ret <4 x i16> %v
1630}
1631
1632@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1633define <4 x i32> @load_v4i32_from_global_address() {
1634; CHECK-LABEL: load_v4i32_from_global_address:
1635; CHECK:         .functype load_v4i32_from_global_address () -> (v128)
1636; CHECK-NEXT:  # %bb.0:
1637; CHECK-NEXT:    i32.const 0
1638; CHECK-NEXT:    v128.load gv_v4i32
1639; CHECK-NEXT:    # fallthrough-return
1640  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
1641  ret <4 x i32> %v
1642}
1643
1644@gv_i32 = global i32 42
1645define <4 x i32> @load_splat_v4i32_from_global_address() {
1646; CHECK-LABEL: load_splat_v4i32_from_global_address:
1647; CHECK:         .functype load_splat_v4i32_from_global_address () -> (v128)
1648; CHECK-NEXT:  # %bb.0:
1649; CHECK-NEXT:    i32.const 0
1650; CHECK-NEXT:    v128.load32_splat gv_i32
1651; CHECK-NEXT:    # fallthrough-return
1652  %e = load i32, i32* @gv_i32
1653  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1654  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1655  ret <4 x i32> %v2
1656}
1657
1658@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1659define <4 x i32> @load_sext_v4i32_from_global_address() {
1660; CHECK-LABEL: load_sext_v4i32_from_global_address:
1661; CHECK:         .functype load_sext_v4i32_from_global_address () -> (v128)
1662; CHECK-NEXT:  # %bb.0:
1663; CHECK-NEXT:    i32.const 0
1664; CHECK-NEXT:    i32x4.load16x4_s gv_v4i16
1665; CHECK-NEXT:    # fallthrough-return
1666  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1667  %v2 = sext <4 x i16> %v to <4 x i32>
1668  ret <4 x i32> %v2
1669}
1670
1671define <4 x i32> @load_zext_v4i32_from_global_address() {
1672; CHECK-LABEL: load_zext_v4i32_from_global_address:
1673; CHECK:         .functype load_zext_v4i32_from_global_address () -> (v128)
1674; CHECK-NEXT:  # %bb.0:
1675; CHECK-NEXT:    i32.const 0
1676; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1677; CHECK-NEXT:    # fallthrough-return
1678  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1679  %v2 = zext <4 x i16> %v to <4 x i32>
1680  ret <4 x i32> %v2
1681}
1682
1683define <4 x i16> @load_ext_v4i32_from_global_address() {
1684; CHECK-LABEL: load_ext_v4i32_from_global_address:
1685; CHECK:         .functype load_ext_v4i32_from_global_address () -> (v128)
1686; CHECK-NEXT:  # %bb.0:
1687; CHECK-NEXT:    i32.const 0
1688; CHECK-NEXT:    v128.load64_zero gv_v4i16
1689; CHECK-NEXT:    # fallthrough-return
1690  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1691  ret <4 x i16> %v
1692}
1693
1694define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
1695; CHECK-LABEL: store_v4i32:
1696; CHECK:         .functype store_v4i32 (v128, i32) -> ()
1697; CHECK-NEXT:  # %bb.0:
1698; CHECK-NEXT:    local.get 1
1699; CHECK-NEXT:    local.get 0
1700; CHECK-NEXT:    v128.store 0
1701; CHECK-NEXT:    # fallthrough-return
1702  store <4 x i32> %v , <4 x i32>* %p
1703  ret void
1704}
1705
1706define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) {
1707; CHECK-LABEL: store_narrowing_v4i32:
1708; CHECK:         .functype store_narrowing_v4i32 (v128, i32) -> ()
1709; CHECK-NEXT:  # %bb.0:
1710; CHECK-NEXT:    local.get 1
1711; CHECK-NEXT:    local.get 0
1712; CHECK-NEXT:    v128.store64_lane 0, 0
1713; CHECK-NEXT:    # fallthrough-return
1714  store <4 x i16> %v , <4 x i16>* %p
1715  ret void
1716}
1717
1718define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
1719; CHECK-LABEL: store_v4i32_with_folded_offset:
1720; CHECK:         .functype store_v4i32_with_folded_offset (v128, i32) -> ()
1721; CHECK-NEXT:  # %bb.0:
1722; CHECK-NEXT:    local.get 1
1723; CHECK-NEXT:    local.get 0
1724; CHECK-NEXT:    v128.store 16
1725; CHECK-NEXT:    # fallthrough-return
1726  %q = ptrtoint <4 x i32>* %p to i32
1727  %r = add nuw i32 %q, 16
1728  %s = inttoptr i32 %r to <4 x i32>*
1729  store <4 x i32> %v , <4 x i32>* %s
1730  ret void
1731}
1732
1733define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) {
1734; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset:
1735; CHECK:         .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
1736; CHECK-NEXT:  # %bb.0:
1737; CHECK-NEXT:    local.get 1
1738; CHECK-NEXT:    i32.const 16
1739; CHECK-NEXT:    i32.add
1740; CHECK-NEXT:    local.get 0
1741; CHECK-NEXT:    v128.store64_lane 0, 0
1742; CHECK-NEXT:    # fallthrough-return
1743  %q = ptrtoint <4 x i16>* %p to i32
1744  %r = add nuw i32 %q, 16
1745  %s = inttoptr i32 %r to <4 x i16>*
1746  store <4 x i16> %v , <4 x i16>* %s
1747  ret void
1748}
1749
1750define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1751; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
1752; CHECK:         .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
1753; CHECK-NEXT:  # %bb.0:
1754; CHECK-NEXT:    local.get 1
1755; CHECK-NEXT:    local.get 0
1756; CHECK-NEXT:    v128.store 16
1757; CHECK-NEXT:    # fallthrough-return
1758  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1759  store <4 x i32> %v , <4 x i32>* %s
1760  ret void
1761}
1762
1763define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
1764; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset:
1765; CHECK:         .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
1766; CHECK-NEXT:  # %bb.0:
1767; CHECK-NEXT:    local.get 1
1768; CHECK-NEXT:    i32.const 8
1769; CHECK-NEXT:    i32.add
1770; CHECK-NEXT:    local.get 0
1771; CHECK-NEXT:    v128.store64_lane 0, 0
1772; CHECK-NEXT:    # fallthrough-return
1773  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1774  store <4 x i16> %v , <4 x i16>* %s
1775  ret void
1776}
1777
1778define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
1779; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
1780; CHECK:         .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1781; CHECK-NEXT:  # %bb.0:
1782; CHECK-NEXT:    local.get 1
1783; CHECK-NEXT:    i32.const -16
1784; CHECK-NEXT:    i32.add
1785; CHECK-NEXT:    local.get 0
1786; CHECK-NEXT:    v128.store 0
1787; CHECK-NEXT:    # fallthrough-return
1788  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1789  store <4 x i32> %v , <4 x i32>* %s
1790  ret void
1791}
1792
1793define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) {
1794; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset:
1795; CHECK:         .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1796; CHECK-NEXT:  # %bb.0:
1797; CHECK-NEXT:    local.get 1
1798; CHECK-NEXT:    i32.const -8
1799; CHECK-NEXT:    i32.add
1800; CHECK-NEXT:    local.get 0
1801; CHECK-NEXT:    v128.store64_lane 0, 0
1802; CHECK-NEXT:    # fallthrough-return
1803  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1804  store <4 x i16> %v , <4 x i16>* %s
1805  ret void
1806}
1807
1808define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
1809; CHECK-LABEL: store_v4i32_with_unfolded_offset:
1810; CHECK:         .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
1811; CHECK-NEXT:  # %bb.0:
1812; CHECK-NEXT:    local.get 1
1813; CHECK-NEXT:    i32.const 16
1814; CHECK-NEXT:    i32.add
1815; CHECK-NEXT:    local.get 0
1816; CHECK-NEXT:    v128.store 0
1817; CHECK-NEXT:    # fallthrough-return
1818  %q = ptrtoint <4 x i32>* %p to i32
1819  %r = add nsw i32 %q, 16
1820  %s = inttoptr i32 %r to <4 x i32>*
1821  store <4 x i32> %v , <4 x i32>* %s
1822  ret void
1823}
1824
1825define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) {
1826; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset:
1827; CHECK:         .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> ()
1828; CHECK-NEXT:  # %bb.0:
1829; CHECK-NEXT:    local.get 1
1830; CHECK-NEXT:    i32.const 16
1831; CHECK-NEXT:    i32.add
1832; CHECK-NEXT:    local.get 0
1833; CHECK-NEXT:    v128.store64_lane 0, 0
1834; CHECK-NEXT:    # fallthrough-return
1835  %q = ptrtoint <4 x i16>* %p to i32
1836  %r = add nsw i32 %q, 16
1837  %s = inttoptr i32 %r to <4 x i16>*
1838  store <4 x i16> %v , <4 x i16>* %s
1839  ret void
1840}
1841
1842define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1843; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
1844; CHECK:         .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
1845; CHECK-NEXT:  # %bb.0:
1846; CHECK-NEXT:    local.get 1
1847; CHECK-NEXT:    i32.const 16
1848; CHECK-NEXT:    i32.add
1849; CHECK-NEXT:    local.get 0
1850; CHECK-NEXT:    v128.store 0
1851; CHECK-NEXT:    # fallthrough-return
1852  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1853  store <4 x i32> %v , <4 x i32>* %s
1854  ret void
1855}
1856
1857define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
1858; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset:
1859; CHECK:         .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
1860; CHECK-NEXT:  # %bb.0:
1861; CHECK-NEXT:    local.get 1
1862; CHECK-NEXT:    i32.const 8
1863; CHECK-NEXT:    i32.add
1864; CHECK-NEXT:    local.get 0
1865; CHECK-NEXT:    v128.store64_lane 0, 0
1866; CHECK-NEXT:    # fallthrough-return
1867  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1868  store <4 x i16> %v , <4 x i16>* %s
1869  ret void
1870}
1871
1872define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
1873; CHECK-LABEL: store_v4i32_to_numeric_address:
1874; CHECK:         .functype store_v4i32_to_numeric_address (v128) -> ()
1875; CHECK-NEXT:  # %bb.0:
1876; CHECK-NEXT:    i32.const 0
1877; CHECK-NEXT:    local.get 0
1878; CHECK-NEXT:    v128.store 32
1879; CHECK-NEXT:    # fallthrough-return
1880  %s = inttoptr i32 32 to <4 x i32>*
1881  store <4 x i32> %v , <4 x i32>* %s
1882  ret void
1883}
1884
1885define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
1886; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address:
1887; CHECK:         .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
1888; CHECK-NEXT:  # %bb.0:
1889; CHECK-NEXT:    i32.const 32
1890; CHECK-NEXT:    local.get 0
1891; CHECK-NEXT:    v128.store64_lane 0, 0
1892; CHECK-NEXT:    # fallthrough-return
1893  %s = inttoptr i32 32 to <4 x i16>*
1894  store <4 x i16> %v , <4 x i16>* %s
1895  ret void
1896}
1897
1898define void @store_v4i32_to_global_address(<4 x i32> %v) {
1899; CHECK-LABEL: store_v4i32_to_global_address:
1900; CHECK:         .functype store_v4i32_to_global_address (v128) -> ()
1901; CHECK-NEXT:  # %bb.0:
1902; CHECK-NEXT:    i32.const 0
1903; CHECK-NEXT:    local.get 0
1904; CHECK-NEXT:    v128.store gv_v4i32
1905; CHECK-NEXT:    # fallthrough-return
1906  store <4 x i32> %v , <4 x i32>* @gv_v4i32
1907  ret void
1908}
1909
1910define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
1911; CHECK-LABEL: store_narrowing_v4i32_to_global_address:
1912; CHECK:         .functype store_narrowing_v4i32_to_global_address (v128) -> ()
1913; CHECK-NEXT:  # %bb.0:
1914; CHECK-NEXT:    i32.const gv_v4i16
1915; CHECK-NEXT:    local.get 0
1916; CHECK-NEXT:    v128.store64_lane 0, 0
1917; CHECK-NEXT:    # fallthrough-return
1918  store <4 x i16> %v , <4 x i16>* @gv_v4i16
1919  ret void
1920}
1921
1922; ==============================================================================
1923; 2 x i64
1924; ==============================================================================
1925define <2 x i64> @load_v2i64(<2 x i64>* %p) {
1926; CHECK-LABEL: load_v2i64:
1927; CHECK:         .functype load_v2i64 (i32) -> (v128)
1928; CHECK-NEXT:  # %bb.0:
1929; CHECK-NEXT:    local.get 0
1930; CHECK-NEXT:    v128.load 0
1931; CHECK-NEXT:    # fallthrough-return
1932  %v = load <2 x i64>, <2 x i64>* %p
1933  ret <2 x i64> %v
1934}
1935
1936define <2 x i64> @load_splat_v2i64(i64* %p) {
1937; CHECK-LABEL: load_splat_v2i64:
1938; CHECK:         .functype load_splat_v2i64 (i32) -> (v128)
1939; CHECK-NEXT:  # %bb.0:
1940; CHECK-NEXT:    local.get 0
1941; CHECK-NEXT:    v128.load64_splat 0
1942; CHECK-NEXT:    # fallthrough-return
1943  %e = load i64, i64* %p
1944  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1945  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1946  ret <2 x i64> %v2
1947}
1948
1949define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
1950; CHECK-LABEL: load_sext_v2i64:
1951; CHECK:         .functype load_sext_v2i64 (i32) -> (v128)
1952; CHECK-NEXT:  # %bb.0:
1953; CHECK-NEXT:    local.get 0
1954; CHECK-NEXT:    i64x2.load32x2_s 0
1955; CHECK-NEXT:    # fallthrough-return
1956  %v = load <2 x i32>, <2 x i32>* %p
1957  %v2 = sext <2 x i32> %v to <2 x i64>
1958  ret <2 x i64> %v2
1959}
1960
1961define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
1962; CHECK-LABEL: load_zext_v2i64:
1963; CHECK:         .functype load_zext_v2i64 (i32) -> (v128)
1964; CHECK-NEXT:  # %bb.0:
1965; CHECK-NEXT:    local.get 0
1966; CHECK-NEXT:    i64x2.load32x2_u 0
1967; CHECK-NEXT:    # fallthrough-return
1968  %v = load <2 x i32>, <2 x i32>* %p
1969  %v2 = zext <2 x i32> %v to <2 x i64>
1970  ret <2 x i64> %v2
1971}
1972
1973define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
1974; CHECK-LABEL: load_ext_v2i64:
1975; CHECK:         .functype load_ext_v2i64 (i32) -> (v128)
1976; CHECK-NEXT:  # %bb.0:
1977; CHECK-NEXT:    local.get 0
1978; CHECK-NEXT:    v128.load64_zero 0
1979; CHECK-NEXT:    # fallthrough-return
1980  %v = load <2 x i32>, <2 x i32>* %p
1981  ret <2 x i32> %v
1982}
1983
1984define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
1985; CHECK-LABEL: load_v2i64_with_folded_offset:
1986; CHECK:         .functype load_v2i64_with_folded_offset (i32) -> (v128)
1987; CHECK-NEXT:  # %bb.0:
1988; CHECK-NEXT:    local.get 0
1989; CHECK-NEXT:    v128.load 16
1990; CHECK-NEXT:    # fallthrough-return
1991  %q = ptrtoint <2 x i64>* %p to i32
1992  %r = add nuw i32 %q, 16
1993  %s = inttoptr i32 %r to <2 x i64>*
1994  %v = load <2 x i64>, <2 x i64>* %s
1995  ret <2 x i64> %v
1996}
1997
1998define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
1999; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
2000; CHECK:         .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
2001; CHECK-NEXT:  # %bb.0:
2002; CHECK-NEXT:    local.get 0
2003; CHECK-NEXT:    v128.load64_splat 16
2004; CHECK-NEXT:    # fallthrough-return
2005  %q = ptrtoint i64* %p to i32
2006  %r = add nuw i32 %q, 16
2007  %s = inttoptr i32 %r to i64*
2008  %e = load i64, i64* %s
2009  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2010  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2011  ret <2 x i64> %v2
2012}
2013
2014define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
2015; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
2016; CHECK:         .functype load_sext_v2i64_with_folded_offset (i32) -> (v128)
2017; CHECK-NEXT:  # %bb.0:
2018; CHECK-NEXT:    local.get 0
2019; CHECK-NEXT:    i64x2.load32x2_s 16
2020; CHECK-NEXT:    # fallthrough-return
2021  %q = ptrtoint <2 x i32>* %p to i32
2022  %r = add nuw i32 %q, 16
2023  %s = inttoptr i32 %r to <2 x i32>*
2024  %v = load <2 x i32>, <2 x i32>* %s
2025  %v2 = sext <2 x i32> %v to <2 x i64>
2026  ret <2 x i64> %v2
2027}
2028
2029define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
2030; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
2031; CHECK:         .functype load_zext_v2i64_with_folded_offset (i32) -> (v128)
2032; CHECK-NEXT:  # %bb.0:
2033; CHECK-NEXT:    local.get 0
2034; CHECK-NEXT:    i64x2.load32x2_u 16
2035; CHECK-NEXT:    # fallthrough-return
2036  %q = ptrtoint <2 x i32>* %p to i32
2037  %r = add nuw i32 %q, 16
2038  %s = inttoptr i32 %r to <2 x i32>*
2039  %v = load <2 x i32>, <2 x i32>* %s
2040  %v2 = zext <2 x i32> %v to <2 x i64>
2041  ret <2 x i64> %v2
2042}
2043
2044define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
2045; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
2046; CHECK:         .functype load_ext_v2i64_with_folded_offset (i32) -> (v128)
2047; CHECK-NEXT:  # %bb.0:
2048; CHECK-NEXT:    local.get 0
2049; CHECK-NEXT:    v128.load64_zero 16
2050; CHECK-NEXT:    # fallthrough-return
2051  %q = ptrtoint <2 x i32>* %p to i32
2052  %r = add nuw i32 %q, 16
2053  %s = inttoptr i32 %r to <2 x i32>*
2054  %v = load <2 x i32>, <2 x i32>* %s
2055  ret <2 x i32> %v
2056}
2057
2058define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
2059; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
2060; CHECK:         .functype load_v2i64_with_folded_gep_offset (i32) -> (v128)
2061; CHECK-NEXT:  # %bb.0:
2062; CHECK-NEXT:    local.get 0
2063; CHECK-NEXT:    v128.load 16
2064; CHECK-NEXT:    # fallthrough-return
2065  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2066  %v = load <2 x i64>, <2 x i64>* %s
2067  ret <2 x i64> %v
2068}
2069
2070define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
2071; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
2072; CHECK:         .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
2073; CHECK-NEXT:  # %bb.0:
2074; CHECK-NEXT:    local.get 0
2075; CHECK-NEXT:    v128.load64_splat 8
2076; CHECK-NEXT:    # fallthrough-return
2077  %s = getelementptr inbounds i64, i64* %p, i32 1
2078  %e = load i64, i64* %s
2079  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2080  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2081  ret <2 x i64> %v2
2082}
2083
2084define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2085; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
2086; CHECK:         .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128)
2087; CHECK-NEXT:  # %bb.0:
2088; CHECK-NEXT:    local.get 0
2089; CHECK-NEXT:    i64x2.load32x2_s 8
2090; CHECK-NEXT:    # fallthrough-return
2091  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2092  %v = load <2 x i32>, <2 x i32>* %s
2093  %v2 = sext <2 x i32> %v to <2 x i64>
2094  ret <2 x i64> %v2
2095}
2096
2097define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2098; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
2099; CHECK:         .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128)
2100; CHECK-NEXT:  # %bb.0:
2101; CHECK-NEXT:    local.get 0
2102; CHECK-NEXT:    i64x2.load32x2_u 8
2103; CHECK-NEXT:    # fallthrough-return
2104  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2105  %v = load <2 x i32>, <2 x i32>* %s
2106  %v2 = zext <2 x i32> %v to <2 x i64>
2107  ret <2 x i64> %v2
2108}
2109
2110define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2111; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
2112; CHECK:         .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128)
2113; CHECK-NEXT:  # %bb.0:
2114; CHECK-NEXT:    local.get 0
2115; CHECK-NEXT:    v128.load64_zero 8
2116; CHECK-NEXT:    # fallthrough-return
2117  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2118  %v = load <2 x i32>, <2 x i32>* %s
2119  ret <2 x i32> %v
2120}
2121
2122define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
2123; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
2124; CHECK:         .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2125; CHECK-NEXT:  # %bb.0:
2126; CHECK-NEXT:    local.get 0
2127; CHECK-NEXT:    i32.const -16
2128; CHECK-NEXT:    i32.add
2129; CHECK-NEXT:    v128.load 0
2130; CHECK-NEXT:    # fallthrough-return
2131  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2132  %v = load <2 x i64>, <2 x i64>* %s
2133  ret <2 x i64> %v
2134}
2135
2136define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
2137; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
2138; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2139; CHECK-NEXT:  # %bb.0:
2140; CHECK-NEXT:    local.get 0
2141; CHECK-NEXT:    i32.const -8
2142; CHECK-NEXT:    i32.add
2143; CHECK-NEXT:    v128.load64_splat 0
2144; CHECK-NEXT:    # fallthrough-return
2145  %s = getelementptr inbounds i64, i64* %p, i32 -1
2146  %e = load i64, i64* %s
2147  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2148  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2149  ret <2 x i64> %v2
2150}
2151
2152define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2153; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
2154; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2155; CHECK-NEXT:  # %bb.0:
2156; CHECK-NEXT:    local.get 0
2157; CHECK-NEXT:    i32.const -8
2158; CHECK-NEXT:    i32.add
2159; CHECK-NEXT:    i64x2.load32x2_s 0
2160; CHECK-NEXT:    # fallthrough-return
2161  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2162  %v = load <2 x i32>, <2 x i32>* %s
2163  %v2 = sext <2 x i32> %v to <2 x i64>
2164  ret <2 x i64> %v2
2165}
2166
2167define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2168; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
2169; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2170; CHECK-NEXT:  # %bb.0:
2171; CHECK-NEXT:    local.get 0
2172; CHECK-NEXT:    i32.const -8
2173; CHECK-NEXT:    i32.add
2174; CHECK-NEXT:    i64x2.load32x2_u 0
2175; CHECK-NEXT:    # fallthrough-return
2176  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2177  %v = load <2 x i32>, <2 x i32>* %s
2178  %v2 = zext <2 x i32> %v to <2 x i64>
2179  ret <2 x i64> %v2
2180}
2181
2182define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2183; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
2184; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2185; CHECK-NEXT:  # %bb.0:
2186; CHECK-NEXT:    local.get 0
2187; CHECK-NEXT:    i32.const -8
2188; CHECK-NEXT:    i32.add
2189; CHECK-NEXT:    v128.load64_zero 0
2190; CHECK-NEXT:    # fallthrough-return
2191  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2192  %v = load <2 x i32>, <2 x i32>* %s
2193  ret <2 x i32> %v
2194}
2195
2196define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
2197; CHECK-LABEL: load_v2i64_with_unfolded_offset:
2198; CHECK:         .functype load_v2i64_with_unfolded_offset (i32) -> (v128)
2199; CHECK-NEXT:  # %bb.0:
2200; CHECK-NEXT:    local.get 0
2201; CHECK-NEXT:    i32.const 16
2202; CHECK-NEXT:    i32.add
2203; CHECK-NEXT:    v128.load 0
2204; CHECK-NEXT:    # fallthrough-return
2205  %q = ptrtoint <2 x i64>* %p to i32
2206  %r = add nsw i32 %q, 16
2207  %s = inttoptr i32 %r to <2 x i64>*
2208  %v = load <2 x i64>, <2 x i64>* %s
2209  ret <2 x i64> %v
2210}
2211
2212define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
2213; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
2214; CHECK:         .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128)
2215; CHECK-NEXT:  # %bb.0:
2216; CHECK-NEXT:    local.get 0
2217; CHECK-NEXT:    i32.const 16
2218; CHECK-NEXT:    i32.add
2219; CHECK-NEXT:    v128.load64_splat 0
2220; CHECK-NEXT:    # fallthrough-return
2221  %q = ptrtoint i64* %p to i32
2222  %r = add nsw i32 %q, 16
2223  %s = inttoptr i32 %r to i64*
2224  %e = load i64, i64* %s
2225  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2226  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2227  ret <2 x i64> %v2
2228}
2229
2230define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2231; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
2232; CHECK:         .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128)
2233; CHECK-NEXT:  # %bb.0:
2234; CHECK-NEXT:    local.get 0
2235; CHECK-NEXT:    i32.const 16
2236; CHECK-NEXT:    i32.add
2237; CHECK-NEXT:    i64x2.load32x2_s 0
2238; CHECK-NEXT:    # fallthrough-return
2239  %q = ptrtoint <2 x i32>* %p to i32
2240  %r = add nsw i32 %q, 16
2241  %s = inttoptr i32 %r to <2 x i32>*
2242  %v = load <2 x i32>, <2 x i32>* %s
2243  %v2 = sext <2 x i32> %v to <2 x i64>
2244  ret <2 x i64> %v2
2245}
2246
2247define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2248; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
2249; CHECK:         .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128)
2250; CHECK-NEXT:  # %bb.0:
2251; CHECK-NEXT:    local.get 0
2252; CHECK-NEXT:    i32.const 16
2253; CHECK-NEXT:    i32.add
2254; CHECK-NEXT:    i64x2.load32x2_u 0
2255; CHECK-NEXT:    # fallthrough-return
2256  %q = ptrtoint <2 x i32>* %p to i32
2257  %r = add nsw i32 %q, 16
2258  %s = inttoptr i32 %r to <2 x i32>*
2259  %v = load <2 x i32>, <2 x i32>* %s
2260  %v2 = zext <2 x i32> %v to <2 x i64>
2261  ret <2 x i64> %v2
2262}
2263
2264define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2265; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
2266; CHECK:         .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128)
2267; CHECK-NEXT:  # %bb.0:
2268; CHECK-NEXT:    local.get 0
2269; CHECK-NEXT:    i32.const 16
2270; CHECK-NEXT:    i32.add
2271; CHECK-NEXT:    v128.load64_zero 0
2272; CHECK-NEXT:    # fallthrough-return
2273  %q = ptrtoint <2 x i32>* %p to i32
2274  %r = add nsw i32 %q, 16
2275  %s = inttoptr i32 %r to <2 x i32>*
2276  %v = load <2 x i32>, <2 x i32>* %s
2277  ret <2 x i32> %v
2278}
2279
2280define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
2281; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
2282; CHECK:         .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2283; CHECK-NEXT:  # %bb.0:
2284; CHECK-NEXT:    local.get 0
2285; CHECK-NEXT:    i32.const 16
2286; CHECK-NEXT:    i32.add
2287; CHECK-NEXT:    v128.load 0
2288; CHECK-NEXT:    # fallthrough-return
2289  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2290  %v = load <2 x i64>, <2 x i64>* %s
2291  ret <2 x i64> %v
2292}
2293
2294define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
2295; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
2296; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2297; CHECK-NEXT:  # %bb.0:
2298; CHECK-NEXT:    local.get 0
2299; CHECK-NEXT:    i32.const 8
2300; CHECK-NEXT:    i32.add
2301; CHECK-NEXT:    v128.load64_splat 0
2302; CHECK-NEXT:    # fallthrough-return
2303  %s = getelementptr i64, i64* %p, i32 1
2304  %e = load i64, i64* %s
2305  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2306  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2307  ret <2 x i64> %v2
2308}
2309
2310define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2311; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
2312; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2313; CHECK-NEXT:  # %bb.0:
2314; CHECK-NEXT:    local.get 0
2315; CHECK-NEXT:    i32.const 8
2316; CHECK-NEXT:    i32.add
2317; CHECK-NEXT:    i64x2.load32x2_s 0
2318; CHECK-NEXT:    # fallthrough-return
2319  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2320  %v = load <2 x i32>, <2 x i32>* %s
2321  %v2 = sext <2 x i32> %v to <2 x i64>
2322  ret <2 x i64> %v2
2323}
2324
2325define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2326; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
2327; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2328; CHECK-NEXT:  # %bb.0:
2329; CHECK-NEXT:    local.get 0
2330; CHECK-NEXT:    i32.const 8
2331; CHECK-NEXT:    i32.add
2332; CHECK-NEXT:    i64x2.load32x2_u 0
2333; CHECK-NEXT:    # fallthrough-return
2334  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2335  %v = load <2 x i32>, <2 x i32>* %s
2336  %v2 = zext <2 x i32> %v to <2 x i64>
2337  ret <2 x i64> %v2
2338}
2339
2340define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2341; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
2342; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2343; CHECK-NEXT:  # %bb.0:
2344; CHECK-NEXT:    local.get 0
2345; CHECK-NEXT:    i32.const 8
2346; CHECK-NEXT:    i32.add
2347; CHECK-NEXT:    v128.load64_zero 0
2348; CHECK-NEXT:    # fallthrough-return
2349  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2350  %v = load <2 x i32>, <2 x i32>* %s
2351  ret <2 x i32> %v
2352}
2353
2354define <2 x i64> @load_v2i64_from_numeric_address() {
2355; CHECK-LABEL: load_v2i64_from_numeric_address:
2356; CHECK:         .functype load_v2i64_from_numeric_address () -> (v128)
2357; CHECK-NEXT:  # %bb.0:
2358; CHECK-NEXT:    i32.const 0
2359; CHECK-NEXT:    v128.load 32
2360; CHECK-NEXT:    # fallthrough-return
2361  %s = inttoptr i32 32 to <2 x i64>*
2362  %v = load <2 x i64>, <2 x i64>* %s
2363  ret <2 x i64> %v
2364}
2365
2366define <2 x i64> @load_splat_v2i64_from_numeric_address() {
2367; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
2368; CHECK:         .functype load_splat_v2i64_from_numeric_address () -> (v128)
2369; CHECK-NEXT:  # %bb.0:
2370; CHECK-NEXT:    i32.const 0
2371; CHECK-NEXT:    v128.load64_splat 32
2372; CHECK-NEXT:    # fallthrough-return
2373  %s = inttoptr i32 32 to i64*
2374  %e = load i64, i64* %s
2375  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2376  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2377  ret <2 x i64> %v2
2378}
2379
2380define <2 x i64> @load_sext_v2i64_from_numeric_address() {
2381; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
2382; CHECK:         .functype load_sext_v2i64_from_numeric_address () -> (v128)
2383; CHECK-NEXT:  # %bb.0:
2384; CHECK-NEXT:    i32.const 0
2385; CHECK-NEXT:    i64x2.load32x2_s 32
2386; CHECK-NEXT:    # fallthrough-return
2387  %s = inttoptr i32 32 to <2 x i32>*
2388  %v = load <2 x i32>, <2 x i32>* %s
2389  %v2 = sext <2 x i32> %v to <2 x i64>
2390  ret <2 x i64> %v2
2391}
2392
2393define <2 x i64> @load_zext_v2i64_from_numeric_address() {
2394; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
2395; CHECK:         .functype load_zext_v2i64_from_numeric_address () -> (v128)
2396; CHECK-NEXT:  # %bb.0:
2397; CHECK-NEXT:    i32.const 0
2398; CHECK-NEXT:    i64x2.load32x2_u 32
2399; CHECK-NEXT:    # fallthrough-return
2400  %s = inttoptr i32 32 to <2 x i32>*
2401  %v = load <2 x i32>, <2 x i32>* %s
2402  %v2 = zext <2 x i32> %v to <2 x i64>
2403  ret <2 x i64> %v2
2404}
2405
2406define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2407; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2408; CHECK:         .functype load_ext_v2i64_from_numeric_address () -> (v128)
2409; CHECK-NEXT:  # %bb.0:
2410; CHECK-NEXT:    i32.const 0
2411; CHECK-NEXT:    v128.load64_zero 32
2412; CHECK-NEXT:    # fallthrough-return
2413  %s = inttoptr i32 32 to <2 x i32>*
2414  %v = load <2 x i32>, <2 x i32>* %s
2415  ret <2 x i32> %v
2416}
2417
2418@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2419define <2 x i64> @load_v2i64_from_global_address() {
2420; CHECK-LABEL: load_v2i64_from_global_address:
2421; CHECK:         .functype load_v2i64_from_global_address () -> (v128)
2422; CHECK-NEXT:  # %bb.0:
2423; CHECK-NEXT:    i32.const 0
2424; CHECK-NEXT:    v128.load gv_v2i64
2425; CHECK-NEXT:    # fallthrough-return
2426  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
2427  ret <2 x i64> %v
2428}
2429
2430@gv_i64 = global i64 42
2431define <2 x i64> @load_splat_v2i64_from_global_address() {
2432; CHECK-LABEL: load_splat_v2i64_from_global_address:
2433; CHECK:         .functype load_splat_v2i64_from_global_address () -> (v128)
2434; CHECK-NEXT:  # %bb.0:
2435; CHECK-NEXT:    i32.const 0
2436; CHECK-NEXT:    v128.load64_splat gv_i64
2437; CHECK-NEXT:    # fallthrough-return
2438  %e = load i64, i64* @gv_i64
2439  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2440  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2441  ret <2 x i64> %v2
2442}
2443
2444@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2445define <2 x i64> @load_sext_v2i64_from_global_address() {
2446; CHECK-LABEL: load_sext_v2i64_from_global_address:
2447; CHECK:         .functype load_sext_v2i64_from_global_address () -> (v128)
2448; CHECK-NEXT:  # %bb.0:
2449; CHECK-NEXT:    i32.const 0
2450; CHECK-NEXT:    i64x2.load32x2_s gv_v2i32
2451; CHECK-NEXT:    # fallthrough-return
2452  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2453  %v2 = sext <2 x i32> %v to <2 x i64>
2454  ret <2 x i64> %v2
2455}
2456
2457define <2 x i64> @load_zext_v2i64_from_global_address() {
2458; CHECK-LABEL: load_zext_v2i64_from_global_address:
2459; CHECK:         .functype load_zext_v2i64_from_global_address () -> (v128)
2460; CHECK-NEXT:  # %bb.0:
2461; CHECK-NEXT:    i32.const 0
2462; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2463; CHECK-NEXT:    # fallthrough-return
2464  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2465  %v2 = zext <2 x i32> %v to <2 x i64>
2466  ret <2 x i64> %v2
2467}
2468
2469define <2 x i32> @load_ext_v2i64_from_global_address() {
2470; CHECK-LABEL: load_ext_v2i64_from_global_address:
2471; CHECK:         .functype load_ext_v2i64_from_global_address () -> (v128)
2472; CHECK-NEXT:  # %bb.0:
2473; CHECK-NEXT:    i32.const 0
2474; CHECK-NEXT:    v128.load64_zero gv_v2i32
2475; CHECK-NEXT:    # fallthrough-return
2476  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2477  ret <2 x i32> %v
2478}
2479
2480define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
2481; CHECK-LABEL: store_v2i64:
2482; CHECK:         .functype store_v2i64 (v128, i32) -> ()
2483; CHECK-NEXT:  # %bb.0:
2484; CHECK-NEXT:    local.get 1
2485; CHECK-NEXT:    local.get 0
2486; CHECK-NEXT:    v128.store 0
2487; CHECK-NEXT:    # fallthrough-return
2488  store <2 x i64> %v , <2 x i64>* %p
2489  ret void
2490}
2491
2492define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
2493; CHECK-LABEL: store_v2i64_with_folded_offset:
2494; CHECK:         .functype store_v2i64_with_folded_offset (v128, i32) -> ()
2495; CHECK-NEXT:  # %bb.0:
2496; CHECK-NEXT:    local.get 1
2497; CHECK-NEXT:    local.get 0
2498; CHECK-NEXT:    v128.store 16
2499; CHECK-NEXT:    # fallthrough-return
2500  %q = ptrtoint <2 x i64>* %p to i32
2501  %r = add nuw i32 %q, 16
2502  %s = inttoptr i32 %r to <2 x i64>*
2503  store <2 x i64> %v , <2 x i64>* %s
2504  ret void
2505}
2506
2507define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2508; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2509; CHECK:         .functype store_v2i64_with_folded_gep_offset (v128, i32) -> ()
2510; CHECK-NEXT:  # %bb.0:
2511; CHECK-NEXT:    local.get 1
2512; CHECK-NEXT:    local.get 0
2513; CHECK-NEXT:    v128.store 16
2514; CHECK-NEXT:    # fallthrough-return
2515  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2516  store <2 x i64> %v , <2 x i64>* %s
2517  ret void
2518}
2519
2520define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
2521; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2522; CHECK:         .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
2523; CHECK-NEXT:  # %bb.0:
2524; CHECK-NEXT:    local.get 1
2525; CHECK-NEXT:    i32.const -16
2526; CHECK-NEXT:    i32.add
2527; CHECK-NEXT:    local.get 0
2528; CHECK-NEXT:    v128.store 0
2529; CHECK-NEXT:    # fallthrough-return
2530  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2531  store <2 x i64> %v , <2 x i64>* %s
2532  ret void
2533}
2534
2535define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
2536; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2537; CHECK:         .functype store_v2i64_with_unfolded_offset (v128, i32) -> ()
2538; CHECK-NEXT:  # %bb.0:
2539; CHECK-NEXT:    local.get 1
2540; CHECK-NEXT:    i32.const 16
2541; CHECK-NEXT:    i32.add
2542; CHECK-NEXT:    local.get 0
2543; CHECK-NEXT:    v128.store 0
2544; CHECK-NEXT:    # fallthrough-return
2545  %q = ptrtoint <2 x i64>* %p to i32
2546  %r = add nsw i32 %q, 16
2547  %s = inttoptr i32 %r to <2 x i64>*
2548  store <2 x i64> %v , <2 x i64>* %s
2549  ret void
2550}
2551
2552define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2553; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2554; CHECK:         .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> ()
2555; CHECK-NEXT:  # %bb.0:
2556; CHECK-NEXT:    local.get 1
2557; CHECK-NEXT:    i32.const 16
2558; CHECK-NEXT:    i32.add
2559; CHECK-NEXT:    local.get 0
2560; CHECK-NEXT:    v128.store 0
2561; CHECK-NEXT:    # fallthrough-return
2562  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2563  store <2 x i64> %v , <2 x i64>* %s
2564  ret void
2565}
2566
2567define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2568; CHECK-LABEL: store_v2i64_to_numeric_address:
2569; CHECK:         .functype store_v2i64_to_numeric_address (v128) -> ()
2570; CHECK-NEXT:  # %bb.0:
2571; CHECK-NEXT:    i32.const 0
2572; CHECK-NEXT:    local.get 0
2573; CHECK-NEXT:    v128.store 32
2574; CHECK-NEXT:    # fallthrough-return
2575  %s = inttoptr i32 32 to <2 x i64>*
2576  store <2 x i64> %v , <2 x i64>* %s
2577  ret void
2578}
2579
2580define void @store_v2i64_to_global_address(<2 x i64> %v) {
2581; CHECK-LABEL: store_v2i64_to_global_address:
2582; CHECK:         .functype store_v2i64_to_global_address (v128) -> ()
2583; CHECK-NEXT:  # %bb.0:
2584; CHECK-NEXT:    i32.const 0
2585; CHECK-NEXT:    local.get 0
2586; CHECK-NEXT:    v128.store gv_v2i64
2587; CHECK-NEXT:    # fallthrough-return
2588  store <2 x i64> %v , <2 x i64>* @gv_v2i64
2589  ret void
2590}
2591
2592; ==============================================================================
2593; 4 x float
2594; ==============================================================================
2595define <4 x float> @load_v4f32(<4 x float>* %p) {
2596; CHECK-LABEL: load_v4f32:
2597; CHECK:         .functype load_v4f32 (i32) -> (v128)
2598; CHECK-NEXT:  # %bb.0:
2599; CHECK-NEXT:    local.get 0
2600; CHECK-NEXT:    v128.load 0
2601; CHECK-NEXT:    # fallthrough-return
2602  %v = load <4 x float>, <4 x float>* %p
2603  ret <4 x float> %v
2604}
2605
2606define <4 x float> @load_splat_v4f32(float* %p) {
2607; CHECK-LABEL: load_splat_v4f32:
2608; CHECK:         .functype load_splat_v4f32 (i32) -> (v128)
2609; CHECK-NEXT:  # %bb.0:
2610; CHECK-NEXT:    local.get 0
2611; CHECK-NEXT:    v128.load32_splat 0
2612; CHECK-NEXT:    # fallthrough-return
2613  %e = load float, float* %p
2614  %v1 = insertelement <4 x float> undef, float %e, i32 0
2615  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2616  ret <4 x float> %v2
2617}
2618
2619define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
2620; CHECK-LABEL: load_v4f32_with_folded_offset:
2621; CHECK:         .functype load_v4f32_with_folded_offset (i32) -> (v128)
2622; CHECK-NEXT:  # %bb.0:
2623; CHECK-NEXT:    local.get 0
2624; CHECK-NEXT:    v128.load 16
2625; CHECK-NEXT:    # fallthrough-return
2626  %q = ptrtoint <4 x float>* %p to i32
2627  %r = add nuw i32 %q, 16
2628  %s = inttoptr i32 %r to <4 x float>*
2629  %v = load <4 x float>, <4 x float>* %s
2630  ret <4 x float> %v
2631}
2632
2633define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
2634; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2635; CHECK:         .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
2636; CHECK-NEXT:  # %bb.0:
2637; CHECK-NEXT:    local.get 0
2638; CHECK-NEXT:    v128.load32_splat 16
2639; CHECK-NEXT:    # fallthrough-return
2640  %q = ptrtoint float* %p to i32
2641  %r = add nuw i32 %q, 16
2642  %s = inttoptr i32 %r to float*
2643  %e = load float, float* %s
2644  %v1 = insertelement <4 x float> undef, float %e, i32 0
2645  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2646  ret <4 x float> %v2
2647}
2648
2649define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
2650; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2651; CHECK:         .functype load_v4f32_with_folded_gep_offset (i32) -> (v128)
2652; CHECK-NEXT:  # %bb.0:
2653; CHECK-NEXT:    local.get 0
2654; CHECK-NEXT:    v128.load 16
2655; CHECK-NEXT:    # fallthrough-return
2656  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2657  %v = load <4 x float>, <4 x float>* %s
2658  ret <4 x float> %v
2659}
2660
2661define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
2662; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2663; CHECK:         .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
2664; CHECK-NEXT:  # %bb.0:
2665; CHECK-NEXT:    local.get 0
2666; CHECK-NEXT:    v128.load32_splat 4
2667; CHECK-NEXT:    # fallthrough-return
2668  %s = getelementptr inbounds float, float* %p, i32 1
2669  %e = load float, float* %s
2670  %v1 = insertelement <4 x float> undef, float %e, i32 0
2671  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2672  ret <4 x float> %v2
2673}
2674
2675define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
2676; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2677; CHECK:         .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2678; CHECK-NEXT:  # %bb.0:
2679; CHECK-NEXT:    local.get 0
2680; CHECK-NEXT:    i32.const -16
2681; CHECK-NEXT:    i32.add
2682; CHECK-NEXT:    v128.load 0
2683; CHECK-NEXT:    # fallthrough-return
2684  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2685  %v = load <4 x float>, <4 x float>* %s
2686  ret <4 x float> %v
2687}
2688
2689define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
2690; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2691; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2692; CHECK-NEXT:  # %bb.0:
2693; CHECK-NEXT:    local.get 0
2694; CHECK-NEXT:    i32.const -4
2695; CHECK-NEXT:    i32.add
2696; CHECK-NEXT:    v128.load32_splat 0
2697; CHECK-NEXT:    # fallthrough-return
2698  %s = getelementptr inbounds float, float* %p, i32 -1
2699  %e = load float, float* %s
2700  %v1 = insertelement <4 x float> undef, float %e, i32 0
2701  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2702  ret <4 x float> %v2
2703}
2704
2705define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
2706; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2707; CHECK:         .functype load_v4f32_with_unfolded_offset (i32) -> (v128)
2708; CHECK-NEXT:  # %bb.0:
2709; CHECK-NEXT:    local.get 0
2710; CHECK-NEXT:    i32.const 16
2711; CHECK-NEXT:    i32.add
2712; CHECK-NEXT:    v128.load 0
2713; CHECK-NEXT:    # fallthrough-return
2714  %q = ptrtoint <4 x float>* %p to i32
2715  %r = add nsw i32 %q, 16
2716  %s = inttoptr i32 %r to <4 x float>*
2717  %v = load <4 x float>, <4 x float>* %s
2718  ret <4 x float> %v
2719}
2720
2721define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
2722; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2723; CHECK:         .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128)
2724; CHECK-NEXT:  # %bb.0:
2725; CHECK-NEXT:    local.get 0
2726; CHECK-NEXT:    i32.const 16
2727; CHECK-NEXT:    i32.add
2728; CHECK-NEXT:    v128.load32_splat 0
2729; CHECK-NEXT:    # fallthrough-return
2730  %q = ptrtoint float* %p to i32
2731  %r = add nsw i32 %q, 16
2732  %s = inttoptr i32 %r to float*
2733  %e = load float, float* %s
2734  %v1 = insertelement <4 x float> undef, float %e, i32 0
2735  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2736  ret <4 x float> %v2
2737}
2738
2739define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
2740; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2741; CHECK:         .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2742; CHECK-NEXT:  # %bb.0:
2743; CHECK-NEXT:    local.get 0
2744; CHECK-NEXT:    i32.const 16
2745; CHECK-NEXT:    i32.add
2746; CHECK-NEXT:    v128.load 0
2747; CHECK-NEXT:    # fallthrough-return
2748  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2749  %v = load <4 x float>, <4 x float>* %s
2750  ret <4 x float> %v
2751}
2752
2753define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
2754; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
2755; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2756; CHECK-NEXT:  # %bb.0:
2757; CHECK-NEXT:    local.get 0
2758; CHECK-NEXT:    i32.const 4
2759; CHECK-NEXT:    i32.add
2760; CHECK-NEXT:    v128.load32_splat 0
2761; CHECK-NEXT:    # fallthrough-return
2762  %s = getelementptr float, float* %p, i32 1
2763  %e = load float, float* %s
2764  %v1 = insertelement <4 x float> undef, float %e, i32 0
2765  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2766  ret <4 x float> %v2
2767}
2768
2769define <4 x float> @load_v4f32_from_numeric_address() {
2770; CHECK-LABEL: load_v4f32_from_numeric_address:
2771; CHECK:         .functype load_v4f32_from_numeric_address () -> (v128)
2772; CHECK-NEXT:  # %bb.0:
2773; CHECK-NEXT:    i32.const 0
2774; CHECK-NEXT:    v128.load 32
2775; CHECK-NEXT:    # fallthrough-return
2776  %s = inttoptr i32 32 to <4 x float>*
2777  %v = load <4 x float>, <4 x float>* %s
2778  ret <4 x float> %v
2779}
2780
2781define <4 x float> @load_splat_v4f32_from_numeric_address() {
2782; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
2783; CHECK:         .functype load_splat_v4f32_from_numeric_address () -> (v128)
2784; CHECK-NEXT:  # %bb.0:
2785; CHECK-NEXT:    i32.const 0
2786; CHECK-NEXT:    v128.load32_splat 32
2787; CHECK-NEXT:    # fallthrough-return
2788  %s = inttoptr i32 32 to float*
2789  %e = load float, float* %s
2790  %v1 = insertelement <4 x float> undef, float %e, i32 0
2791  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2792  ret <4 x float> %v2
2793}
2794
2795@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
2796define <4 x float> @load_v4f32_from_global_address() {
2797; CHECK-LABEL: load_v4f32_from_global_address:
2798; CHECK:         .functype load_v4f32_from_global_address () -> (v128)
2799; CHECK-NEXT:  # %bb.0:
2800; CHECK-NEXT:    i32.const 0
2801; CHECK-NEXT:    v128.load gv_v4f32
2802; CHECK-NEXT:    # fallthrough-return
2803  %v = load <4 x float>, <4 x float>* @gv_v4f32
2804  ret <4 x float> %v
2805}
2806
2807@gv_f32 = global float 42.
2808define <4 x float> @load_splat_v4f32_from_global_address() {
2809; CHECK-LABEL: load_splat_v4f32_from_global_address:
2810; CHECK:         .functype load_splat_v4f32_from_global_address () -> (v128)
2811; CHECK-NEXT:  # %bb.0:
2812; CHECK-NEXT:    i32.const 0
2813; CHECK-NEXT:    v128.load32_splat gv_f32
2814; CHECK-NEXT:    # fallthrough-return
2815  %e = load float, float* @gv_f32
2816  %v1 = insertelement <4 x float> undef, float %e, i32 0
2817  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2818  ret <4 x float> %v2
2819}
2820
2821define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
2822; CHECK-LABEL: store_v4f32:
2823; CHECK:         .functype store_v4f32 (v128, i32) -> ()
2824; CHECK-NEXT:  # %bb.0:
2825; CHECK-NEXT:    local.get 1
2826; CHECK-NEXT:    local.get 0
2827; CHECK-NEXT:    v128.store 0
2828; CHECK-NEXT:    # fallthrough-return
2829  store <4 x float> %v , <4 x float>* %p
2830  ret void
2831}
2832
2833define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
2834; CHECK-LABEL: store_v4f32_with_folded_offset:
2835; CHECK:         .functype store_v4f32_with_folded_offset (v128, i32) -> ()
2836; CHECK-NEXT:  # %bb.0:
2837; CHECK-NEXT:    local.get 1
2838; CHECK-NEXT:    local.get 0
2839; CHECK-NEXT:    v128.store 16
2840; CHECK-NEXT:    # fallthrough-return
2841  %q = ptrtoint <4 x float>* %p to i32
2842  %r = add nuw i32 %q, 16
2843  %s = inttoptr i32 %r to <4 x float>*
2844  store <4 x float> %v , <4 x float>* %s
2845  ret void
2846}
2847
2848define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2849; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
2850; CHECK:         .functype store_v4f32_with_folded_gep_offset (v128, i32) -> ()
2851; CHECK-NEXT:  # %bb.0:
2852; CHECK-NEXT:    local.get 1
2853; CHECK-NEXT:    local.get 0
2854; CHECK-NEXT:    v128.store 16
2855; CHECK-NEXT:    # fallthrough-return
2856  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2857  store <4 x float> %v , <4 x float>* %s
2858  ret void
2859}
2860
2861define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
2862; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
2863; CHECK:         .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> ()
2864; CHECK-NEXT:  # %bb.0:
2865; CHECK-NEXT:    local.get 1
2866; CHECK-NEXT:    i32.const -16
2867; CHECK-NEXT:    i32.add
2868; CHECK-NEXT:    local.get 0
2869; CHECK-NEXT:    v128.store 0
2870; CHECK-NEXT:    # fallthrough-return
2871  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2872  store <4 x float> %v , <4 x float>* %s
2873  ret void
2874}
2875
2876define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
2877; CHECK-LABEL: store_v4f32_with_unfolded_offset:
2878; CHECK:         .functype store_v4f32_with_unfolded_offset (v128, i32) -> ()
2879; CHECK-NEXT:  # %bb.0:
2880; CHECK-NEXT:    local.get 1
2881; CHECK-NEXT:    i32.const 16
2882; CHECK-NEXT:    i32.add
2883; CHECK-NEXT:    local.get 0
2884; CHECK-NEXT:    v128.store 0
2885; CHECK-NEXT:    # fallthrough-return
2886  %q = ptrtoint <4 x float>* %p to i32
2887  %r = add nsw i32 %q, 16
2888  %s = inttoptr i32 %r to <4 x float>*
2889  store <4 x float> %v , <4 x float>* %s
2890  ret void
2891}
2892
2893define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2894; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
2895; CHECK:         .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> ()
2896; CHECK-NEXT:  # %bb.0:
2897; CHECK-NEXT:    local.get 1
2898; CHECK-NEXT:    i32.const 16
2899; CHECK-NEXT:    i32.add
2900; CHECK-NEXT:    local.get 0
2901; CHECK-NEXT:    v128.store 0
2902; CHECK-NEXT:    # fallthrough-return
2903  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2904  store <4 x float> %v , <4 x float>* %s
2905  ret void
2906}
2907
2908define void @store_v4f32_to_numeric_address(<4 x float> %v) {
2909; CHECK-LABEL: store_v4f32_to_numeric_address:
2910; CHECK:         .functype store_v4f32_to_numeric_address (v128) -> ()
2911; CHECK-NEXT:  # %bb.0:
2912; CHECK-NEXT:    i32.const 0
2913; CHECK-NEXT:    local.get 0
2914; CHECK-NEXT:    v128.store 32
2915; CHECK-NEXT:    # fallthrough-return
2916  %s = inttoptr i32 32 to <4 x float>*
2917  store <4 x float> %v , <4 x float>* %s
2918  ret void
2919}
2920
2921define void @store_v4f32_to_global_address(<4 x float> %v) {
2922; CHECK-LABEL: store_v4f32_to_global_address:
2923; CHECK:         .functype store_v4f32_to_global_address (v128) -> ()
2924; CHECK-NEXT:  # %bb.0:
2925; CHECK-NEXT:    i32.const 0
2926; CHECK-NEXT:    local.get 0
2927; CHECK-NEXT:    v128.store gv_v4f32
2928; CHECK-NEXT:    # fallthrough-return
2929  store <4 x float> %v , <4 x float>* @gv_v4f32
2930  ret void
2931}
2932
2933; ==============================================================================
2934; 2 x double
2935; ==============================================================================
2936define <2 x double> @load_v2f64(<2 x double>* %p) {
2937; CHECK-LABEL: load_v2f64:
2938; CHECK:         .functype load_v2f64 (i32) -> (v128)
2939; CHECK-NEXT:  # %bb.0:
2940; CHECK-NEXT:    local.get 0
2941; CHECK-NEXT:    v128.load 0
2942; CHECK-NEXT:    # fallthrough-return
2943  %v = load <2 x double>, <2 x double>* %p
2944  ret <2 x double> %v
2945}
2946
2947define <2 x double> @load_splat_v2f64(double* %p) {
2948; CHECK-LABEL: load_splat_v2f64:
2949; CHECK:         .functype load_splat_v2f64 (i32) -> (v128)
2950; CHECK-NEXT:  # %bb.0:
2951; CHECK-NEXT:    local.get 0
2952; CHECK-NEXT:    v128.load64_splat 0
2953; CHECK-NEXT:    # fallthrough-return
2954  %e = load double, double* %p
2955  %v1 = insertelement <2 x double> undef, double %e, i32 0
2956  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2957  ret <2 x double> %v2
2958}
2959
2960define <2 x double> @load_promote_v2f64(<2 x float>* %p) {
2961; CHECK-LABEL: load_promote_v2f64:
2962; CHECK:         .functype load_promote_v2f64 (i32) -> (v128)
2963; CHECK-NEXT:  # %bb.0:
2964; CHECK-NEXT:    local.get 0
2965; CHECK-NEXT:    v128.load64_zero 0
2966; CHECK-NEXT:    f64x2.promote_low_f32x4
2967; CHECK-NEXT:    # fallthrough-return
2968  %e = load <2 x float>, <2 x float>* %p
2969  %v = fpext <2 x float> %e to <2 x double>
2970  ret <2 x double> %v
2971}
2972
2973define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
2974; CHECK-LABEL: load_v2f64_with_folded_offset:
2975; CHECK:         .functype load_v2f64_with_folded_offset (i32) -> (v128)
2976; CHECK-NEXT:  # %bb.0:
2977; CHECK-NEXT:    local.get 0
2978; CHECK-NEXT:    v128.load 16
2979; CHECK-NEXT:    # fallthrough-return
2980  %q = ptrtoint <2 x double>* %p to i32
2981  %r = add nuw i32 %q, 16
2982  %s = inttoptr i32 %r to <2 x double>*
2983  %v = load <2 x double>, <2 x double>* %s
2984  ret <2 x double> %v
2985}
2986
2987define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
2988; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
2989; CHECK:         .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
2990; CHECK-NEXT:  # %bb.0:
2991; CHECK-NEXT:    local.get 0
2992; CHECK-NEXT:    v128.load64_splat 16
2993; CHECK-NEXT:    # fallthrough-return
2994  %q = ptrtoint double* %p to i32
2995  %r = add nuw i32 %q, 16
2996  %s = inttoptr i32 %r to double*
2997  %e = load double, double* %s
2998  %v1 = insertelement <2 x double> undef, double %e, i32 0
2999  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3000  ret <2 x double> %v2
3001}
3002
3003define <2 x double> @load_promote_v2f64_with_folded_offset(<2 x float>* %p) {
3004; CHECK-LABEL: load_promote_v2f64_with_folded_offset:
3005; CHECK:         .functype load_promote_v2f64_with_folded_offset (i32) -> (v128)
3006; CHECK-NEXT:  # %bb.0:
3007; CHECK-NEXT:    local.get 0
3008; CHECK-NEXT:    i32.const 16
3009; CHECK-NEXT:    i32.add
3010; CHECK-NEXT:    v128.load64_zero 0
3011; CHECK-NEXT:    f64x2.promote_low_f32x4
3012; CHECK-NEXT:    # fallthrough-return
3013  %q = ptrtoint <2 x float>* %p to i32
3014  %r = add nuw i32 %q, 16
3015  %s = inttoptr i32 %r to <2 x float>*
3016  %e = load <2 x float>, <2 x float>* %s
3017  %v = fpext <2 x float> %e to <2 x double>
3018  ret <2 x double> %v
3019}
3020
3021define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
3022; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
3023; CHECK:         .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
3024; CHECK-NEXT:  # %bb.0:
3025; CHECK-NEXT:    local.get 0
3026; CHECK-NEXT:    v128.load 16
3027; CHECK-NEXT:    # fallthrough-return
3028  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
3029  %v = load <2 x double>, <2 x double>* %s
3030  ret <2 x double> %v
3031}
3032
3033define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
3034; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
3035; CHECK:         .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
3036; CHECK-NEXT:  # %bb.0:
3037; CHECK-NEXT:    local.get 0
3038; CHECK-NEXT:    v128.load64_splat 8
3039; CHECK-NEXT:    # fallthrough-return
3040  %s = getelementptr inbounds double, double* %p, i32 1
3041  %e = load double, double* %s
3042  %v1 = insertelement <2 x double> undef, double %e, i32 0
3043  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3044  ret <2 x double> %v2
3045}
3046
3047define <2 x double> @load_promote_v2f64_with_folded_gep_offset(<2 x float>* %p) {
3048; CHECK-LABEL: load_promote_v2f64_with_folded_gep_offset:
3049; CHECK:         .functype load_promote_v2f64_with_folded_gep_offset (i32) -> (v128)
3050; CHECK-NEXT:  # %bb.0:
3051; CHECK-NEXT:    local.get 0
3052; CHECK-NEXT:    i32.const 8
3053; CHECK-NEXT:    i32.add
3054; CHECK-NEXT:    v128.load64_zero 0
3055; CHECK-NEXT:    f64x2.promote_low_f32x4
3056; CHECK-NEXT:    # fallthrough-return
3057  %s = getelementptr inbounds <2 x float>, <2 x float>* %p, i32 1
3058  %e = load <2 x float>, <2 x float>* %s
3059  %v = fpext <2 x float> %e to <2 x double>
3060  ret <2 x double> %v
3061}
3062
3063define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
3064; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
3065; CHECK:         .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3066; CHECK-NEXT:  # %bb.0:
3067; CHECK-NEXT:    local.get 0
3068; CHECK-NEXT:    i32.const -16
3069; CHECK-NEXT:    i32.add
3070; CHECK-NEXT:    v128.load 0
3071; CHECK-NEXT:    # fallthrough-return
3072  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
3073  %v = load <2 x double>, <2 x double>* %s
3074  ret <2 x double> %v
3075}
3076
3077define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
3078; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
3079; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3080; CHECK-NEXT:  # %bb.0:
3081; CHECK-NEXT:    local.get 0
3082; CHECK-NEXT:    i32.const -8
3083; CHECK-NEXT:    i32.add
3084; CHECK-NEXT:    v128.load64_splat 0
3085; CHECK-NEXT:    # fallthrough-return
3086  %s = getelementptr inbounds double, double* %p, i32 -1
3087  %e = load double, double* %s
3088  %v1 = insertelement <2 x double> undef, double %e, i32 0
3089  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3090  ret <2 x double> %v2
3091}
3092
3093define <2 x double> @load_promote_v2f64_with_unfolded_gep_negative_offset(<2 x float>* %p) {
3094; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_negative_offset:
3095; CHECK:         .functype load_promote_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3096; CHECK-NEXT:  # %bb.0:
3097; CHECK-NEXT:    local.get 0
3098; CHECK-NEXT:    i32.const -8
3099; CHECK-NEXT:    i32.add
3100; CHECK-NEXT:    v128.load64_zero 0
3101; CHECK-NEXT:    f64x2.promote_low_f32x4
3102; CHECK-NEXT:    # fallthrough-return
3103  %s = getelementptr inbounds <2 x float>, <2 x float>* %p, i32 -1
3104  %e = load <2 x float>, <2 x float>* %s
3105  %v = fpext <2 x float> %e to <2 x double>
3106  ret <2 x double> %v
3107}
3108
3109define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
3110; CHECK-LABEL: load_v2f64_with_unfolded_offset:
3111; CHECK:         .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
3112; CHECK-NEXT:  # %bb.0:
3113; CHECK-NEXT:    local.get 0
3114; CHECK-NEXT:    i32.const 16
3115; CHECK-NEXT:    i32.add
3116; CHECK-NEXT:    v128.load 0
3117; CHECK-NEXT:    # fallthrough-return
3118  %q = ptrtoint <2 x double>* %p to i32
3119  %r = add nsw i32 %q, 16
3120  %s = inttoptr i32 %r to <2 x double>*
3121  %v = load <2 x double>, <2 x double>* %s
3122  ret <2 x double> %v
3123}
3124
3125define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
3126; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
3127; CHECK:         .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128)
3128; CHECK-NEXT:  # %bb.0:
3129; CHECK-NEXT:    local.get 0
3130; CHECK-NEXT:    i32.const 16
3131; CHECK-NEXT:    i32.add
3132; CHECK-NEXT:    v128.load64_splat 0
3133; CHECK-NEXT:    # fallthrough-return
3134  %q = ptrtoint double* %p to i32
3135  %r = add nsw i32 %q, 16
3136  %s = inttoptr i32 %r to double*
3137  %e = load double, double* %s
3138  %v1 = insertelement <2 x double> undef, double %e, i32 0
3139  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3140  ret <2 x double> %v2
3141}
3142
3143define <2 x double> @load_promote_v2f64_with_unfolded_offset(<2 x float>* %p) {
3144; CHECK-LABEL: load_promote_v2f64_with_unfolded_offset:
3145; CHECK:         .functype load_promote_v2f64_with_unfolded_offset (i32) -> (v128)
3146; CHECK-NEXT:  # %bb.0:
3147; CHECK-NEXT:    local.get 0
3148; CHECK-NEXT:    i32.const 16
3149; CHECK-NEXT:    i32.add
3150; CHECK-NEXT:    v128.load64_zero 0
3151; CHECK-NEXT:    f64x2.promote_low_f32x4
3152; CHECK-NEXT:    # fallthrough-return
3153  %q = ptrtoint <2 x float>* %p to i32
3154  %r = add nsw i32 %q, 16
3155  %s = inttoptr i32 %r to <2 x float>*
3156  %e = load <2 x float>, <2 x float>* %s
3157  %v = fpext <2 x float> %e to <2 x double>
3158  ret <2 x double> %v
3159}
3160
3161define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
3162; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
3163; CHECK:         .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3164; CHECK-NEXT:  # %bb.0:
3165; CHECK-NEXT:    local.get 0
3166; CHECK-NEXT:    i32.const 16
3167; CHECK-NEXT:    i32.add
3168; CHECK-NEXT:    v128.load 0
3169; CHECK-NEXT:    # fallthrough-return
3170  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
3171  %v = load <2 x double>, <2 x double>* %s
3172  ret <2 x double> %v
3173}
3174
3175define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
3176; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
3177; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3178; CHECK-NEXT:  # %bb.0:
3179; CHECK-NEXT:    local.get 0
3180; CHECK-NEXT:    i32.const 8
3181; CHECK-NEXT:    i32.add
3182; CHECK-NEXT:    v128.load64_splat 0
3183; CHECK-NEXT:    # fallthrough-return
3184  %s = getelementptr double, double* %p, i32 1
3185  %e = load double, double* %s
3186  %v1 = insertelement <2 x double> undef, double %e, i32 0
3187  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3188  ret <2 x double> %v2
3189}
3190
3191define <2 x double> @load_promote_v2f64_with_unfolded_gep_offset(<2 x float>* %p) {
3192; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_offset:
3193; CHECK:         .functype load_promote_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3194; CHECK-NEXT:  # %bb.0:
3195; CHECK-NEXT:    local.get 0
3196; CHECK-NEXT:    i32.const 8
3197; CHECK-NEXT:    i32.add
3198; CHECK-NEXT:    v128.load64_zero 0
3199; CHECK-NEXT:    f64x2.promote_low_f32x4
3200; CHECK-NEXT:    # fallthrough-return
3201  %s = getelementptr <2 x float>, <2 x float>* %p, i32 1
3202  %e = load <2 x float>, <2 x float>* %s
3203  %v = fpext <2 x float> %e to <2 x double>
3204  ret <2 x double> %v
3205}
3206
3207define <2 x double> @load_v2f64_from_numeric_address() {
3208; CHECK-LABEL: load_v2f64_from_numeric_address:
3209; CHECK:         .functype load_v2f64_from_numeric_address () -> (v128)
3210; CHECK-NEXT:  # %bb.0:
3211; CHECK-NEXT:    i32.const 0
3212; CHECK-NEXT:    v128.load 32
3213; CHECK-NEXT:    # fallthrough-return
3214  %s = inttoptr i32 32 to <2 x double>*
3215  %v = load <2 x double>, <2 x double>* %s
3216  ret <2 x double> %v
3217}
3218
3219define <2 x double> @load_splat_v2f64_from_numeric_address() {
3220; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
3221; CHECK:         .functype load_splat_v2f64_from_numeric_address () -> (v128)
3222; CHECK-NEXT:  # %bb.0:
3223; CHECK-NEXT:    i32.const 0
3224; CHECK-NEXT:    v128.load64_splat 32
3225; CHECK-NEXT:    # fallthrough-return
3226  %s = inttoptr i32 32 to double*
3227  %e = load double, double* %s
3228  %v1 = insertelement <2 x double> undef, double %e, i32 0
3229  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3230  ret <2 x double> %v2
3231}
3232
3233define <2 x double> @load_promote_v2f64_from_numeric_address() {
3234; CHECK-LABEL: load_promote_v2f64_from_numeric_address:
3235; CHECK:         .functype load_promote_v2f64_from_numeric_address () -> (v128)
3236; CHECK-NEXT:  # %bb.0:
3237; CHECK-NEXT:    i32.const 32
3238; CHECK-NEXT:    v128.load64_zero 0
3239; CHECK-NEXT:    f64x2.promote_low_f32x4
3240; CHECK-NEXT:    # fallthrough-return
3241  %s = inttoptr i32 32 to <2 x float>*
3242  %e = load <2 x float>, <2 x float>* %s
3243  %v = fpext <2 x float> %e to <2 x double>
3244  ret <2 x double> %v
3245}
3246
3247@gv_v2f64 = global <2 x double> <double 42., double 42.>
3248define <2 x double> @load_v2f64_from_global_address() {
3249; CHECK-LABEL: load_v2f64_from_global_address:
3250; CHECK:         .functype load_v2f64_from_global_address () -> (v128)
3251; CHECK-NEXT:  # %bb.0:
3252; CHECK-NEXT:    i32.const 0
3253; CHECK-NEXT:    v128.load gv_v2f64
3254; CHECK-NEXT:    # fallthrough-return
3255  %v = load <2 x double>, <2 x double>* @gv_v2f64
3256  ret <2 x double> %v
3257}
3258
3259@gv_f64 = global double 42.
3260define <2 x double> @load_splat_v2f64_from_global_address() {
3261; CHECK-LABEL: load_splat_v2f64_from_global_address:
3262; CHECK:         .functype load_splat_v2f64_from_global_address () -> (v128)
3263; CHECK-NEXT:  # %bb.0:
3264; CHECK-NEXT:    i32.const 0
3265; CHECK-NEXT:    v128.load64_splat gv_f64
3266; CHECK-NEXT:    # fallthrough-return
3267  %e = load double, double* @gv_f64
3268  %v1 = insertelement <2 x double> undef, double %e, i32 0
3269  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3270  ret <2 x double> %v2
3271}
3272
3273@gv_v2f32 = global <2 x float> <float 42., float 42.>
3274define <2 x double> @load_promote_v2f64_from_global_address() {
3275; CHECK-LABEL: load_promote_v2f64_from_global_address:
3276; CHECK:         .functype load_promote_v2f64_from_global_address () -> (v128)
3277; CHECK-NEXT:  # %bb.0:
3278; CHECK-NEXT:    i32.const gv_v2f32
3279; CHECK-NEXT:    v128.load64_zero 0
3280; CHECK-NEXT:    f64x2.promote_low_f32x4
3281; CHECK-NEXT:    # fallthrough-return
3282  %e = load <2 x float>, <2 x float>* @gv_v2f32
3283  %v = fpext <2 x float> %e to <2 x double>
3284  ret <2 x double> %v
3285}
3286
3287define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
3288; CHECK-LABEL: store_v2f64:
3289; CHECK:         .functype store_v2f64 (v128, i32) -> ()
3290; CHECK-NEXT:  # %bb.0:
3291; CHECK-NEXT:    local.get 1
3292; CHECK-NEXT:    local.get 0
3293; CHECK-NEXT:    v128.store 0
3294; CHECK-NEXT:    # fallthrough-return
3295  store <2 x double> %v , <2 x double>* %p
3296  ret void
3297}
3298
3299define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
3300; CHECK-LABEL: store_v2f64_with_folded_offset:
3301; CHECK:         .functype store_v2f64_with_folded_offset (v128, i32) -> ()
3302; CHECK-NEXT:  # %bb.0:
3303; CHECK-NEXT:    local.get 1
3304; CHECK-NEXT:    local.get 0
3305; CHECK-NEXT:    v128.store 16
3306; CHECK-NEXT:    # fallthrough-return
3307  %q = ptrtoint <2 x double>* %p to i32
3308  %r = add nuw i32 %q, 16
3309  %s = inttoptr i32 %r to <2 x double>*
3310  store <2 x double> %v , <2 x double>* %s
3311  ret void
3312}
3313
3314define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
3315; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
3316; CHECK:         .functype store_v2f64_with_folded_gep_offset (v128, i32) -> ()
3317; CHECK-NEXT:  # %bb.0:
3318; CHECK-NEXT:    local.get 1
3319; CHECK-NEXT:    local.get 0
3320; CHECK-NEXT:    v128.store 16
3321; CHECK-NEXT:    # fallthrough-return
3322  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
3323  store <2 x double> %v , <2 x double>* %s
3324  ret void
3325}
3326
3327define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
3328; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
3329; CHECK:         .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> ()
3330; CHECK-NEXT:  # %bb.0:
3331; CHECK-NEXT:    local.get 1
3332; CHECK-NEXT:    i32.const -16
3333; CHECK-NEXT:    i32.add
3334; CHECK-NEXT:    local.get 0
3335; CHECK-NEXT:    v128.store 0
3336; CHECK-NEXT:    # fallthrough-return
3337  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
3338  store <2 x double> %v , <2 x double>* %s
3339  ret void
3340}
3341
3342define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
3343; CHECK-LABEL: store_v2f64_with_unfolded_offset:
3344; CHECK:         .functype store_v2f64_with_unfolded_offset (v128, i32) -> ()
3345; CHECK-NEXT:  # %bb.0:
3346; CHECK-NEXT:    local.get 1
3347; CHECK-NEXT:    i32.const 16
3348; CHECK-NEXT:    i32.add
3349; CHECK-NEXT:    local.get 0
3350; CHECK-NEXT:    v128.store 0
3351; CHECK-NEXT:    # fallthrough-return
3352  %q = ptrtoint <2 x double>* %p to i32
3353  %r = add nsw i32 %q, 16
3354  %s = inttoptr i32 %r to <2 x double>*
3355  store <2 x double> %v , <2 x double>* %s
3356  ret void
3357}
3358
3359define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
3360; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
3361; CHECK:         .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> ()
3362; CHECK-NEXT:  # %bb.0:
3363; CHECK-NEXT:    local.get 1
3364; CHECK-NEXT:    i32.const 16
3365; CHECK-NEXT:    i32.add
3366; CHECK-NEXT:    local.get 0
3367; CHECK-NEXT:    v128.store 0
3368; CHECK-NEXT:    # fallthrough-return
3369  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
3370  store <2 x double> %v , <2 x double>* %s
3371  ret void
3372}
3373
3374define void @store_v2f64_to_numeric_address(<2 x double> %v) {
3375; CHECK-LABEL: store_v2f64_to_numeric_address:
3376; CHECK:         .functype store_v2f64_to_numeric_address (v128) -> ()
3377; CHECK-NEXT:  # %bb.0:
3378; CHECK-NEXT:    i32.const 0
3379; CHECK-NEXT:    local.get 0
3380; CHECK-NEXT:    v128.store 32
3381; CHECK-NEXT:    # fallthrough-return
3382  %s = inttoptr i32 32 to <2 x double>*
3383  store <2 x double> %v , <2 x double>* %s
3384  ret void
3385}
3386
3387define void @store_v2f64_to_global_address(<2 x double> %v) {
3388; CHECK-LABEL: store_v2f64_to_global_address:
3389; CHECK:         .functype store_v2f64_to_global_address (v128) -> ()
3390; CHECK-NEXT:  # %bb.0:
3391; CHECK-NEXT:    i32.const 0
3392; CHECK-NEXT:    local.get 0
3393; CHECK-NEXT:    v128.store gv_v2f64
3394; CHECK-NEXT:    # fallthrough-return
3395  store <2 x double> %v , <2 x double>* @gv_v2f64
3396  ret void
3397}
3398