1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test SIMD loads and stores
5
6target triple = "wasm32-unknown-unknown"
7
8; ==============================================================================
9; 16 x i8
10; ==============================================================================
11define <16 x i8> @load_v16i8(<16 x i8>* %p) {
12; CHECK-LABEL: load_v16i8:
13; CHECK:         .functype load_v16i8 (i32) -> (v128)
14; CHECK-NEXT:  # %bb.0:
15; CHECK-NEXT:    local.get 0
16; CHECK-NEXT:    v128.load 0
17; CHECK-NEXT:    # fallthrough-return
18  %v = load <16 x i8>, <16 x i8>* %p
19  ret <16 x i8> %v
20}
21
22define <16 x i8> @load_splat_v16i8(i8* %p) {
23; CHECK-LABEL: load_splat_v16i8:
24; CHECK:         .functype load_splat_v16i8 (i32) -> (v128)
25; CHECK-NEXT:  # %bb.0:
26; CHECK-NEXT:    local.get 0
27; CHECK-NEXT:    v128.load8_splat 0
28; CHECK-NEXT:    # fallthrough-return
29  %e = load i8, i8* %p
30  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
31  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
32  ret <16 x i8> %v2
33}
34
35define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
36; CHECK-LABEL: load_v16i8_with_folded_offset:
37; CHECK:         .functype load_v16i8_with_folded_offset (i32) -> (v128)
38; CHECK-NEXT:  # %bb.0:
39; CHECK-NEXT:    local.get 0
40; CHECK-NEXT:    v128.load 16
41; CHECK-NEXT:    # fallthrough-return
42  %q = ptrtoint <16 x i8>* %p to i32
43  %r = add nuw i32 %q, 16
44  %s = inttoptr i32 %r to <16 x i8>*
45  %v = load <16 x i8>, <16 x i8>* %s
46  ret <16 x i8> %v
47}
48
49define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
50; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
51; CHECK:         .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
52; CHECK-NEXT:  # %bb.0:
53; CHECK-NEXT:    local.get 0
54; CHECK-NEXT:    v128.load8_splat 16
55; CHECK-NEXT:    # fallthrough-return
56  %q = ptrtoint i8* %p to i32
57  %r = add nuw i32 %q, 16
58  %s = inttoptr i32 %r to i8*
59  %e = load i8, i8* %s
60  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
61  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
62  ret <16 x i8> %v2
63}
64
65define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
66; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
67; CHECK:         .functype load_v16i8_with_folded_gep_offset (i32) -> (v128)
68; CHECK-NEXT:  # %bb.0:
69; CHECK-NEXT:    local.get 0
70; CHECK-NEXT:    v128.load 16
71; CHECK-NEXT:    # fallthrough-return
72  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
73  %v = load <16 x i8>, <16 x i8>* %s
74  ret <16 x i8> %v
75}
76
77define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
78; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
79; CHECK:         .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
80; CHECK-NEXT:  # %bb.0:
81; CHECK-NEXT:    local.get 0
82; CHECK-NEXT:    v128.load8_splat 1
83; CHECK-NEXT:    # fallthrough-return
84  %s = getelementptr inbounds i8, i8* %p, i32 1
85  %e = load i8, i8* %s
86  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
87  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
88  ret <16 x i8> %v2
89}
90
91define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
92; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
93; CHECK:         .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
94; CHECK-NEXT:  # %bb.0:
95; CHECK-NEXT:    local.get 0
96; CHECK-NEXT:    i32.const -16
97; CHECK-NEXT:    i32.add
98; CHECK-NEXT:    v128.load 0
99; CHECK-NEXT:    # fallthrough-return
100  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
101  %v = load <16 x i8>, <16 x i8>* %s
102  ret <16 x i8> %v
103}
104
105define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
106; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
107; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
108; CHECK-NEXT:  # %bb.0:
109; CHECK-NEXT:    local.get 0
110; CHECK-NEXT:    i32.const -1
111; CHECK-NEXT:    i32.add
112; CHECK-NEXT:    v128.load8_splat 0
113; CHECK-NEXT:    # fallthrough-return
114  %s = getelementptr inbounds i8, i8* %p, i32 -1
115  %e = load i8, i8* %s
116  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
117  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
118  ret <16 x i8> %v2
119}
120
121define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
122; CHECK-LABEL: load_v16i8_with_unfolded_offset:
123; CHECK:         .functype load_v16i8_with_unfolded_offset (i32) -> (v128)
124; CHECK-NEXT:  # %bb.0:
125; CHECK-NEXT:    local.get 0
126; CHECK-NEXT:    i32.const 16
127; CHECK-NEXT:    i32.add
128; CHECK-NEXT:    v128.load 0
129; CHECK-NEXT:    # fallthrough-return
130  %q = ptrtoint <16 x i8>* %p to i32
131  %r = add nsw i32 %q, 16
132  %s = inttoptr i32 %r to <16 x i8>*
133  %v = load <16 x i8>, <16 x i8>* %s
134  ret <16 x i8> %v
135}
136
137define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
138; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
139; CHECK:         .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128)
140; CHECK-NEXT:  # %bb.0:
141; CHECK-NEXT:    local.get 0
142; CHECK-NEXT:    i32.const 16
143; CHECK-NEXT:    i32.add
144; CHECK-NEXT:    v128.load8_splat 0
145; CHECK-NEXT:    # fallthrough-return
146  %q = ptrtoint i8* %p to i32
147  %r = add nsw i32 %q, 16
148  %s = inttoptr i32 %r to i8*
149  %e = load i8, i8* %s
150  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
151  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
152  ret <16 x i8> %v2
153}
154
155define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
156; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
157; CHECK:         .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128)
158; CHECK-NEXT:  # %bb.0:
159; CHECK-NEXT:    local.get 0
160; CHECK-NEXT:    i32.const 16
161; CHECK-NEXT:    i32.add
162; CHECK-NEXT:    v128.load 0
163; CHECK-NEXT:    # fallthrough-return
164  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
165  %v = load <16 x i8>, <16 x i8>* %s
166  ret <16 x i8> %v
167}
168
169define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
170; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
171; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128)
172; CHECK-NEXT:  # %bb.0:
173; CHECK-NEXT:    local.get 0
174; CHECK-NEXT:    i32.const 1
175; CHECK-NEXT:    i32.add
176; CHECK-NEXT:    v128.load8_splat 0
177; CHECK-NEXT:    # fallthrough-return
178  %s = getelementptr i8, i8* %p, i32 1
179  %e = load i8, i8* %s
180  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
181  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
182  ret <16 x i8> %v2
183}
184
185define <16 x i8> @load_v16i8_from_numeric_address() {
186; CHECK-LABEL: load_v16i8_from_numeric_address:
187; CHECK:         .functype load_v16i8_from_numeric_address () -> (v128)
188; CHECK-NEXT:  # %bb.0:
189; CHECK-NEXT:    i32.const 0
190; CHECK-NEXT:    v128.load 32
191; CHECK-NEXT:    # fallthrough-return
192  %s = inttoptr i32 32 to <16 x i8>*
193  %v = load <16 x i8>, <16 x i8>* %s
194  ret <16 x i8> %v
195}
196
197define <16 x i8> @load_splat_v16i8_from_numeric_address() {
198; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
199; CHECK:         .functype load_splat_v16i8_from_numeric_address () -> (v128)
200; CHECK-NEXT:  # %bb.0:
201; CHECK-NEXT:    i32.const 0
202; CHECK-NEXT:    v128.load8_splat 32
203; CHECK-NEXT:    # fallthrough-return
204  %s = inttoptr i32 32 to i8*
205  %e = load i8, i8* %s
206  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
207  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
208  ret <16 x i8> %v2
209}
210
211@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
212define <16 x i8> @load_v16i8_from_global_address() {
213; CHECK-LABEL: load_v16i8_from_global_address:
214; CHECK:         .functype load_v16i8_from_global_address () -> (v128)
215; CHECK-NEXT:  # %bb.0:
216; CHECK-NEXT:    i32.const 0
217; CHECK-NEXT:    v128.load gv_v16i8
218; CHECK-NEXT:    # fallthrough-return
219  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
220  ret <16 x i8> %v
221}
222
223@gv_i8 = global i8 42
224define <16 x i8> @load_splat_v16i8_from_global_address() {
225; CHECK-LABEL: load_splat_v16i8_from_global_address:
226; CHECK:         .functype load_splat_v16i8_from_global_address () -> (v128)
227; CHECK-NEXT:  # %bb.0:
228; CHECK-NEXT:    i32.const 0
229; CHECK-NEXT:    v128.load8_splat gv_i8
230; CHECK-NEXT:    # fallthrough-return
231  %e = load i8, i8* @gv_i8
232  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
233  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
234  ret <16 x i8> %v2
235}
236
237define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
238; CHECK-LABEL: store_v16i8:
239; CHECK:         .functype store_v16i8 (v128, i32) -> ()
240; CHECK-NEXT:  # %bb.0:
241; CHECK-NEXT:    local.get 1
242; CHECK-NEXT:    local.get 0
243; CHECK-NEXT:    v128.store 0
244; CHECK-NEXT:    # fallthrough-return
245  store <16 x i8> %v , <16 x i8>* %p
246  ret void
247}
248
249define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
250; CHECK-LABEL: store_v16i8_with_folded_offset:
251; CHECK:         .functype store_v16i8_with_folded_offset (v128, i32) -> ()
252; CHECK-NEXT:  # %bb.0:
253; CHECK-NEXT:    local.get 1
254; CHECK-NEXT:    local.get 0
255; CHECK-NEXT:    v128.store 16
256; CHECK-NEXT:    # fallthrough-return
257  %q = ptrtoint <16 x i8>* %p to i32
258  %r = add nuw i32 %q, 16
259  %s = inttoptr i32 %r to <16 x i8>*
260  store <16 x i8> %v , <16 x i8>* %s
261  ret void
262}
263
264define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
265; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
266; CHECK:         .functype store_v16i8_with_folded_gep_offset (v128, i32) -> ()
267; CHECK-NEXT:  # %bb.0:
268; CHECK-NEXT:    local.get 1
269; CHECK-NEXT:    local.get 0
270; CHECK-NEXT:    v128.store 16
271; CHECK-NEXT:    # fallthrough-return
272  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
273  store <16 x i8> %v , <16 x i8>* %s
274  ret void
275}
276
277define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
278; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
279; CHECK:         .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
280; CHECK-NEXT:  # %bb.0:
281; CHECK-NEXT:    local.get 1
282; CHECK-NEXT:    i32.const -16
283; CHECK-NEXT:    i32.add
284; CHECK-NEXT:    local.get 0
285; CHECK-NEXT:    v128.store 0
286; CHECK-NEXT:    # fallthrough-return
287  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
288  store <16 x i8> %v , <16 x i8>* %s
289  ret void
290}
291
292define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
293; CHECK-LABEL: store_v16i8_with_unfolded_offset:
294; CHECK:         .functype store_v16i8_with_unfolded_offset (v128, i32) -> ()
295; CHECK-NEXT:  # %bb.0:
296; CHECK-NEXT:    local.get 1
297; CHECK-NEXT:    i32.const 16
298; CHECK-NEXT:    i32.add
299; CHECK-NEXT:    local.get 0
300; CHECK-NEXT:    v128.store 0
301; CHECK-NEXT:    # fallthrough-return
302  %q = ptrtoint <16 x i8>* %p to i32
303  %r = add nsw i32 %q, 16
304  %s = inttoptr i32 %r to <16 x i8>*
305  store <16 x i8> %v , <16 x i8>* %s
306  ret void
307}
308
309define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
310; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
311; CHECK:         .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> ()
312; CHECK-NEXT:  # %bb.0:
313; CHECK-NEXT:    local.get 1
314; CHECK-NEXT:    i32.const 16
315; CHECK-NEXT:    i32.add
316; CHECK-NEXT:    local.get 0
317; CHECK-NEXT:    v128.store 0
318; CHECK-NEXT:    # fallthrough-return
319  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
320  store <16 x i8> %v , <16 x i8>* %s
321  ret void
322}
323
324define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
325; CHECK-LABEL: store_v16i8_to_numeric_address:
326; CHECK:         .functype store_v16i8_to_numeric_address (v128) -> ()
327; CHECK-NEXT:  # %bb.0:
328; CHECK-NEXT:    i32.const 0
329; CHECK-NEXT:    local.get 0
330; CHECK-NEXT:    v128.store 32
331; CHECK-NEXT:    # fallthrough-return
332  %s = inttoptr i32 32 to <16 x i8>*
333  store <16 x i8> %v , <16 x i8>* %s
334  ret void
335}
336
337define void @store_v16i8_to_global_address(<16 x i8> %v) {
338; CHECK-LABEL: store_v16i8_to_global_address:
339; CHECK:         .functype store_v16i8_to_global_address (v128) -> ()
340; CHECK-NEXT:  # %bb.0:
341; CHECK-NEXT:    i32.const 0
342; CHECK-NEXT:    local.get 0
343; CHECK-NEXT:    v128.store gv_v16i8
344; CHECK-NEXT:    # fallthrough-return
345  store <16 x i8> %v , <16 x i8>* @gv_v16i8
346  ret void
347}
348
349; ==============================================================================
350; 8 x i16
351; ==============================================================================
352define <8 x i16> @load_v8i16(<8 x i16>* %p) {
353; CHECK-LABEL: load_v8i16:
354; CHECK:         .functype load_v8i16 (i32) -> (v128)
355; CHECK-NEXT:  # %bb.0:
356; CHECK-NEXT:    local.get 0
357; CHECK-NEXT:    v128.load 0
358; CHECK-NEXT:    # fallthrough-return
359  %v = load <8 x i16>, <8 x i16>* %p
360  ret <8 x i16> %v
361}
362
363define <8 x i16> @load_splat_v8i16(i16* %p) {
364; CHECK-LABEL: load_splat_v8i16:
365; CHECK:         .functype load_splat_v8i16 (i32) -> (v128)
366; CHECK-NEXT:  # %bb.0:
367; CHECK-NEXT:    local.get 0
368; CHECK-NEXT:    v128.load16_splat 0
369; CHECK-NEXT:    # fallthrough-return
370  %e = load i16, i16* %p
371  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
372  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
373  ret <8 x i16> %v2
374}
375
376define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
377; CHECK-LABEL: load_sext_v8i16:
378; CHECK:         .functype load_sext_v8i16 (i32) -> (v128)
379; CHECK-NEXT:  # %bb.0:
380; CHECK-NEXT:    local.get 0
381; CHECK-NEXT:    i16x8.load8x8_s 0
382; CHECK-NEXT:    # fallthrough-return
383  %v = load <8 x i8>, <8 x i8>* %p
384  %v2 = sext <8 x i8> %v to <8 x i16>
385  ret <8 x i16> %v2
386}
387
388define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
389; CHECK-LABEL: load_zext_v8i16:
390; CHECK:         .functype load_zext_v8i16 (i32) -> (v128)
391; CHECK-NEXT:  # %bb.0:
392; CHECK-NEXT:    local.get 0
393; CHECK-NEXT:    i16x8.load8x8_u 0
394; CHECK-NEXT:    # fallthrough-return
395  %v = load <8 x i8>, <8 x i8>* %p
396  %v2 = zext <8 x i8> %v to <8 x i16>
397  ret <8 x i16> %v2
398}
399
400define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
401; CHECK-LABEL: load_ext_v8i16:
402; CHECK:         .functype load_ext_v8i16 (i32) -> (v128)
403; CHECK-NEXT:  # %bb.0:
404; CHECK-NEXT:    local.get 0
405; CHECK-NEXT:    i16x8.load8x8_u 0
406; CHECK-NEXT:    # fallthrough-return
407  %v = load <8 x i8>, <8 x i8>* %p
408  ret <8 x i8> %v
409}
410
411define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
412; CHECK-LABEL: load_v8i16_with_folded_offset:
413; CHECK:         .functype load_v8i16_with_folded_offset (i32) -> (v128)
414; CHECK-NEXT:  # %bb.0:
415; CHECK-NEXT:    local.get 0
416; CHECK-NEXT:    v128.load 16
417; CHECK-NEXT:    # fallthrough-return
418  %q = ptrtoint <8 x i16>* %p to i32
419  %r = add nuw i32 %q, 16
420  %s = inttoptr i32 %r to <8 x i16>*
421  %v = load <8 x i16>, <8 x i16>* %s
422  ret <8 x i16> %v
423}
424
425define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
426; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
427; CHECK:         .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
428; CHECK-NEXT:  # %bb.0:
429; CHECK-NEXT:    local.get 0
430; CHECK-NEXT:    v128.load16_splat 16
431; CHECK-NEXT:    # fallthrough-return
432  %q = ptrtoint i16* %p to i32
433  %r = add nuw i32 %q, 16
434  %s = inttoptr i32 %r to i16*
435  %e = load i16, i16* %s
436  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
437  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
438  ret <8 x i16> %v2
439}
440
441define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
442; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
443; CHECK:         .functype load_sext_v8i16_with_folded_offset (i32) -> (v128)
444; CHECK-NEXT:  # %bb.0:
445; CHECK-NEXT:    local.get 0
446; CHECK-NEXT:    i16x8.load8x8_s 16
447; CHECK-NEXT:    # fallthrough-return
448  %q = ptrtoint <8 x i8>* %p to i32
449  %r = add nuw i32 %q, 16
450  %s = inttoptr i32 %r to <8 x i8>*
451  %v = load <8 x i8>, <8 x i8>* %s
452  %v2 = sext <8 x i8> %v to <8 x i16>
453  ret <8 x i16> %v2
454}
455
456define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
457; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
458; CHECK:         .functype load_zext_v8i16_with_folded_offset (i32) -> (v128)
459; CHECK-NEXT:  # %bb.0:
460; CHECK-NEXT:    local.get 0
461; CHECK-NEXT:    i16x8.load8x8_u 16
462; CHECK-NEXT:    # fallthrough-return
463  %q = ptrtoint <8 x i8>* %p to i32
464  %r = add nuw i32 %q, 16
465  %s = inttoptr i32 %r to <8 x i8>*
466  %v = load <8 x i8>, <8 x i8>* %s
467  %v2 = zext <8 x i8> %v to <8 x i16>
468  ret <8 x i16> %v2
469}
470
471define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
472; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
473; CHECK:         .functype load_ext_v8i16_with_folded_offset (i32) -> (v128)
474; CHECK-NEXT:  # %bb.0:
475; CHECK-NEXT:    local.get 0
476; CHECK-NEXT:    i16x8.load8x8_u 16
477; CHECK-NEXT:    # fallthrough-return
478  %q = ptrtoint <8 x i8>* %p to i32
479  %r = add nuw i32 %q, 16
480  %s = inttoptr i32 %r to <8 x i8>*
481  %v = load <8 x i8>, <8 x i8>* %s
482  ret <8 x i8> %v
483}
484
485define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
486; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
487; CHECK:         .functype load_v8i16_with_folded_gep_offset (i32) -> (v128)
488; CHECK-NEXT:  # %bb.0:
489; CHECK-NEXT:    local.get 0
490; CHECK-NEXT:    v128.load 16
491; CHECK-NEXT:    # fallthrough-return
492  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
493  %v = load <8 x i16>, <8 x i16>* %s
494  ret <8 x i16> %v
495}
496
497define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
498; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
499; CHECK:         .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
500; CHECK-NEXT:  # %bb.0:
501; CHECK-NEXT:    local.get 0
502; CHECK-NEXT:    v128.load16_splat 2
503; CHECK-NEXT:    # fallthrough-return
504  %s = getelementptr inbounds i16, i16* %p, i32 1
505  %e = load i16, i16* %s
506  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
507  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
508  ret <8 x i16> %v2
509}
510
511define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
512; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
513; CHECK:         .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128)
514; CHECK-NEXT:  # %bb.0:
515; CHECK-NEXT:    local.get 0
516; CHECK-NEXT:    i16x8.load8x8_s 8
517; CHECK-NEXT:    # fallthrough-return
518  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
519  %v = load <8 x i8>, <8 x i8>* %s
520  %v2 = sext <8 x i8> %v to <8 x i16>
521  ret <8 x i16> %v2
522}
523
524define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
525; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
526; CHECK:         .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128)
527; CHECK-NEXT:  # %bb.0:
528; CHECK-NEXT:    local.get 0
529; CHECK-NEXT:    i16x8.load8x8_u 8
530; CHECK-NEXT:    # fallthrough-return
531  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
532  %v = load <8 x i8>, <8 x i8>* %s
533  %v2 = zext <8 x i8> %v to <8 x i16>
534  ret <8 x i16> %v2
535}
536
537define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
538; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
539; CHECK:         .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128)
540; CHECK-NEXT:  # %bb.0:
541; CHECK-NEXT:    local.get 0
542; CHECK-NEXT:    i16x8.load8x8_u 8
543; CHECK-NEXT:    # fallthrough-return
544  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
545  %v = load <8 x i8>, <8 x i8>* %s
546  ret <8 x i8> %v
547}
548
549define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
550; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
551; CHECK:         .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
552; CHECK-NEXT:  # %bb.0:
553; CHECK-NEXT:    local.get 0
554; CHECK-NEXT:    i32.const -16
555; CHECK-NEXT:    i32.add
556; CHECK-NEXT:    v128.load 0
557; CHECK-NEXT:    # fallthrough-return
558  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
559  %v = load <8 x i16>, <8 x i16>* %s
560  ret <8 x i16> %v
561}
562
563define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
564; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
565; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
566; CHECK-NEXT:  # %bb.0:
567; CHECK-NEXT:    local.get 0
568; CHECK-NEXT:    i32.const -2
569; CHECK-NEXT:    i32.add
570; CHECK-NEXT:    v128.load16_splat 0
571; CHECK-NEXT:    # fallthrough-return
572  %s = getelementptr inbounds i16, i16* %p, i32 -1
573  %e = load i16, i16* %s
574  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
575  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
576  ret <8 x i16> %v2
577}
578
579define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
580; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
581; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
582; CHECK-NEXT:  # %bb.0:
583; CHECK-NEXT:    local.get 0
584; CHECK-NEXT:    i32.const -8
585; CHECK-NEXT:    i32.add
586; CHECK-NEXT:    i16x8.load8x8_s 0
587; CHECK-NEXT:    # fallthrough-return
588  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
589  %v = load <8 x i8>, <8 x i8>* %s
590  %v2 = sext <8 x i8> %v to <8 x i16>
591  ret <8 x i16> %v2
592}
593
594define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
595; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
596; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
597; CHECK-NEXT:  # %bb.0:
598; CHECK-NEXT:    local.get 0
599; CHECK-NEXT:    i32.const -8
600; CHECK-NEXT:    i32.add
601; CHECK-NEXT:    i16x8.load8x8_u 0
602; CHECK-NEXT:    # fallthrough-return
603  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
604  %v = load <8 x i8>, <8 x i8>* %s
605  %v2 = zext <8 x i8> %v to <8 x i16>
606  ret <8 x i16> %v2
607}
608
609define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
610; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
611; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
612; CHECK-NEXT:  # %bb.0:
613; CHECK-NEXT:    local.get 0
614; CHECK-NEXT:    i32.const -8
615; CHECK-NEXT:    i32.add
616; CHECK-NEXT:    i16x8.load8x8_u 0
617; CHECK-NEXT:    # fallthrough-return
618  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
619  %v = load <8 x i8>, <8 x i8>* %s
620  ret <8 x i8> %v
621}
622
623define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
624; CHECK-LABEL: load_v8i16_with_unfolded_offset:
625; CHECK:         .functype load_v8i16_with_unfolded_offset (i32) -> (v128)
626; CHECK-NEXT:  # %bb.0:
627; CHECK-NEXT:    local.get 0
628; CHECK-NEXT:    i32.const 16
629; CHECK-NEXT:    i32.add
630; CHECK-NEXT:    v128.load 0
631; CHECK-NEXT:    # fallthrough-return
632  %q = ptrtoint <8 x i16>* %p to i32
633  %r = add nsw i32 %q, 16
634  %s = inttoptr i32 %r to <8 x i16>*
635  %v = load <8 x i16>, <8 x i16>* %s
636  ret <8 x i16> %v
637}
638
639define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
640; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
641; CHECK:         .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128)
642; CHECK-NEXT:  # %bb.0:
643; CHECK-NEXT:    local.get 0
644; CHECK-NEXT:    i32.const 16
645; CHECK-NEXT:    i32.add
646; CHECK-NEXT:    v128.load16_splat 0
647; CHECK-NEXT:    # fallthrough-return
648  %q = ptrtoint i16* %p to i32
649  %r = add nsw i32 %q, 16
650  %s = inttoptr i32 %r to i16*
651  %e = load i16, i16* %s
652  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
653  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
654  ret <8 x i16> %v2
655}
656
657define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
658; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
659; CHECK:         .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128)
660; CHECK-NEXT:  # %bb.0:
661; CHECK-NEXT:    local.get 0
662; CHECK-NEXT:    i32.const 16
663; CHECK-NEXT:    i32.add
664; CHECK-NEXT:    i16x8.load8x8_s 0
665; CHECK-NEXT:    # fallthrough-return
666  %q = ptrtoint <8 x i8>* %p to i32
667  %r = add nsw i32 %q, 16
668  %s = inttoptr i32 %r to <8 x i8>*
669  %v = load <8 x i8>, <8 x i8>* %s
670  %v2 = sext <8 x i8> %v to <8 x i16>
671  ret <8 x i16> %v2
672}
673
674define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
675; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
676; CHECK:         .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128)
677; CHECK-NEXT:  # %bb.0:
678; CHECK-NEXT:    local.get 0
679; CHECK-NEXT:    i32.const 16
680; CHECK-NEXT:    i32.add
681; CHECK-NEXT:    i16x8.load8x8_u 0
682; CHECK-NEXT:    # fallthrough-return
683  %q = ptrtoint <8 x i8>* %p to i32
684  %r = add nsw i32 %q, 16
685  %s = inttoptr i32 %r to <8 x i8>*
686  %v = load <8 x i8>, <8 x i8>* %s
687  %v2 = zext <8 x i8> %v to <8 x i16>
688  ret <8 x i16> %v2
689}
690
691define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
692; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
693; CHECK:         .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128)
694; CHECK-NEXT:  # %bb.0:
695; CHECK-NEXT:    local.get 0
696; CHECK-NEXT:    i32.const 16
697; CHECK-NEXT:    i32.add
698; CHECK-NEXT:    i16x8.load8x8_u 0
699; CHECK-NEXT:    # fallthrough-return
700  %q = ptrtoint <8 x i8>* %p to i32
701  %r = add nsw i32 %q, 16
702  %s = inttoptr i32 %r to <8 x i8>*
703  %v = load <8 x i8>, <8 x i8>* %s
704  ret <8 x i8> %v
705}
706
707define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
708; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
709; CHECK:         .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128)
710; CHECK-NEXT:  # %bb.0:
711; CHECK-NEXT:    local.get 0
712; CHECK-NEXT:    i32.const 16
713; CHECK-NEXT:    i32.add
714; CHECK-NEXT:    v128.load 0
715; CHECK-NEXT:    # fallthrough-return
716  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
717  %v = load <8 x i16>, <8 x i16>* %s
718  ret <8 x i16> %v
719}
720
721define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
722; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
723; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128)
724; CHECK-NEXT:  # %bb.0:
725; CHECK-NEXT:    local.get 0
726; CHECK-NEXT:    i32.const 2
727; CHECK-NEXT:    i32.add
728; CHECK-NEXT:    v128.load16_splat 0
729; CHECK-NEXT:    # fallthrough-return
730  %s = getelementptr i16, i16* %p, i32 1
731  %e = load i16, i16* %s
732  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
733  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
734  ret <8 x i16> %v2
735}
736
737define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
738; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
739; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
740; CHECK-NEXT:  # %bb.0:
741; CHECK-NEXT:    local.get 0
742; CHECK-NEXT:    i32.const 8
743; CHECK-NEXT:    i32.add
744; CHECK-NEXT:    i16x8.load8x8_s 0
745; CHECK-NEXT:    # fallthrough-return
746  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
747  %v = load <8 x i8>, <8 x i8>* %s
748  %v2 = sext <8 x i8> %v to <8 x i16>
749  ret <8 x i16> %v2
750}
751
752define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
753; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
754; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
755; CHECK-NEXT:  # %bb.0:
756; CHECK-NEXT:    local.get 0
757; CHECK-NEXT:    i32.const 8
758; CHECK-NEXT:    i32.add
759; CHECK-NEXT:    i16x8.load8x8_u 0
760; CHECK-NEXT:    # fallthrough-return
761  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
762  %v = load <8 x i8>, <8 x i8>* %s
763  %v2 = zext <8 x i8> %v to <8 x i16>
764  ret <8 x i16> %v2
765}
766
767define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
768; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
769; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
770; CHECK-NEXT:  # %bb.0:
771; CHECK-NEXT:    local.get 0
772; CHECK-NEXT:    i32.const 8
773; CHECK-NEXT:    i32.add
774; CHECK-NEXT:    i16x8.load8x8_u 0
775; CHECK-NEXT:    # fallthrough-return
776  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
777  %v = load <8 x i8>, <8 x i8>* %s
778  ret <8 x i8> %v
779}
780
781define <8 x i16> @load_v8i16_from_numeric_address() {
782; CHECK-LABEL: load_v8i16_from_numeric_address:
783; CHECK:         .functype load_v8i16_from_numeric_address () -> (v128)
784; CHECK-NEXT:  # %bb.0:
785; CHECK-NEXT:    i32.const 0
786; CHECK-NEXT:    v128.load 32
787; CHECK-NEXT:    # fallthrough-return
788  %s = inttoptr i32 32 to <8 x i16>*
789  %v = load <8 x i16>, <8 x i16>* %s
790  ret <8 x i16> %v
791}
792
793define <8 x i16> @load_splat_v8i16_from_numeric_address() {
794; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
795; CHECK:         .functype load_splat_v8i16_from_numeric_address () -> (v128)
796; CHECK-NEXT:  # %bb.0:
797; CHECK-NEXT:    i32.const 0
798; CHECK-NEXT:    v128.load16_splat 32
799; CHECK-NEXT:    # fallthrough-return
800  %s = inttoptr i32 32 to i16*
801  %e = load i16, i16* %s
802  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
803  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
804  ret <8 x i16> %v2
805}
806
807define <8 x i16> @load_sext_v8i16_from_numeric_address() {
808; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
809; CHECK:         .functype load_sext_v8i16_from_numeric_address () -> (v128)
810; CHECK-NEXT:  # %bb.0:
811; CHECK-NEXT:    i32.const 0
812; CHECK-NEXT:    i16x8.load8x8_s 32
813; CHECK-NEXT:    # fallthrough-return
814  %s = inttoptr i32 32 to <8 x i8>*
815  %v = load <8 x i8>, <8 x i8>* %s
816  %v2 = sext <8 x i8> %v to <8 x i16>
817  ret <8 x i16> %v2
818}
819
820define <8 x i16> @load_zext_v8i16_from_numeric_address() {
821; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
822; CHECK:         .functype load_zext_v8i16_from_numeric_address () -> (v128)
823; CHECK-NEXT:  # %bb.0:
824; CHECK-NEXT:    i32.const 0
825; CHECK-NEXT:    i16x8.load8x8_u 32
826; CHECK-NEXT:    # fallthrough-return
827  %s = inttoptr i32 32 to <8 x i8>*
828  %v = load <8 x i8>, <8 x i8>* %s
829  %v2 = zext <8 x i8> %v to <8 x i16>
830  ret <8 x i16> %v2
831}
832
833define <8 x i8> @load_ext_v8i16_from_numeric_address() {
834; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
835; CHECK:         .functype load_ext_v8i16_from_numeric_address () -> (v128)
836; CHECK-NEXT:  # %bb.0:
837; CHECK-NEXT:    i32.const 0
838; CHECK-NEXT:    i16x8.load8x8_u 32
839; CHECK-NEXT:    # fallthrough-return
840  %s = inttoptr i32 32 to <8 x i8>*
841  %v = load <8 x i8>, <8 x i8>* %s
842  ret <8 x i8> %v
843}
844
845@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
846define <8 x i16> @load_v8i16_from_global_address() {
847; CHECK-LABEL: load_v8i16_from_global_address:
848; CHECK:         .functype load_v8i16_from_global_address () -> (v128)
849; CHECK-NEXT:  # %bb.0:
850; CHECK-NEXT:    i32.const 0
851; CHECK-NEXT:    v128.load gv_v8i16
852; CHECK-NEXT:    # fallthrough-return
853  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
854  ret <8 x i16> %v
855}
856
857@gv_i16 = global i16 42
858define <8 x i16> @load_splat_v8i16_from_global_address() {
859; CHECK-LABEL: load_splat_v8i16_from_global_address:
860; CHECK:         .functype load_splat_v8i16_from_global_address () -> (v128)
861; CHECK-NEXT:  # %bb.0:
862; CHECK-NEXT:    i32.const 0
863; CHECK-NEXT:    v128.load16_splat gv_i16
864; CHECK-NEXT:    # fallthrough-return
865  %e = load i16, i16* @gv_i16
866  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
867  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
868  ret <8 x i16> %v2
869}
870
871@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
872define <8 x i16> @load_sext_v8i16_from_global_address() {
873; CHECK-LABEL: load_sext_v8i16_from_global_address:
874; CHECK:         .functype load_sext_v8i16_from_global_address () -> (v128)
875; CHECK-NEXT:  # %bb.0:
876; CHECK-NEXT:    i32.const 0
877; CHECK-NEXT:    i16x8.load8x8_s gv_v8i8
878; CHECK-NEXT:    # fallthrough-return
879  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
880  %v2 = sext <8 x i8> %v to <8 x i16>
881  ret <8 x i16> %v2
882}
883
884define <8 x i16> @load_zext_v8i16_from_global_address() {
885; CHECK-LABEL: load_zext_v8i16_from_global_address:
886; CHECK:         .functype load_zext_v8i16_from_global_address () -> (v128)
887; CHECK-NEXT:  # %bb.0:
888; CHECK-NEXT:    i32.const 0
889; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
890; CHECK-NEXT:    # fallthrough-return
891  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
892  %v2 = zext <8 x i8> %v to <8 x i16>
893  ret <8 x i16> %v2
894}
895
896define <8 x i8> @load_ext_v8i16_from_global_address() {
897; CHECK-LABEL: load_ext_v8i16_from_global_address:
898; CHECK:         .functype load_ext_v8i16_from_global_address () -> (v128)
899; CHECK-NEXT:  # %bb.0:
900; CHECK-NEXT:    i32.const 0
901; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
902; CHECK-NEXT:    # fallthrough-return
903  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
904  ret <8 x i8> %v
905}
906
907
908define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
909; CHECK-LABEL: store_v8i16:
910; CHECK:         .functype store_v8i16 (v128, i32) -> ()
911; CHECK-NEXT:  # %bb.0:
912; CHECK-NEXT:    local.get 1
913; CHECK-NEXT:    local.get 0
914; CHECK-NEXT:    v128.store 0
915; CHECK-NEXT:    # fallthrough-return
916  store <8 x i16> %v , <8 x i16>* %p
917  ret void
918}
919
920define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) {
921; CHECK-LABEL: store_narrowing_v8i16:
922; CHECK:         .functype store_narrowing_v8i16 (v128, i32) -> ()
923; CHECK-NEXT:  # %bb.0:
924; CHECK-NEXT:    local.get 1
925; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
926; CHECK-NEXT:    local.get 0
927; CHECK-NEXT:    v128.and
928; CHECK-NEXT:    local.get 0
929; CHECK-NEXT:    i8x16.narrow_i16x8_u
930; CHECK-NEXT:    i64x2.extract_lane 0
931; CHECK-NEXT:    i64.store 0
932; CHECK-NEXT:    # fallthrough-return
933  store <8 x i8> %v, <8 x i8>* %p
934  ret void
935}
936
937define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
938; CHECK-LABEL: store_v8i16_with_folded_offset:
939; CHECK:         .functype store_v8i16_with_folded_offset (v128, i32) -> ()
940; CHECK-NEXT:  # %bb.0:
941; CHECK-NEXT:    local.get 1
942; CHECK-NEXT:    local.get 0
943; CHECK-NEXT:    v128.store 16
944; CHECK-NEXT:    # fallthrough-return
945  %q = ptrtoint <8 x i16>* %p to i32
946  %r = add nuw i32 %q, 16
947  %s = inttoptr i32 %r to <8 x i16>*
948  store <8 x i16> %v , <8 x i16>* %s
949  ret void
950}
951
952define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) {
953; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset:
954; CHECK:         .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
955; CHECK-NEXT:  # %bb.0:
956; CHECK-NEXT:    local.get 1
957; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
958; CHECK-NEXT:    local.get 0
959; CHECK-NEXT:    v128.and
960; CHECK-NEXT:    local.get 0
961; CHECK-NEXT:    i8x16.narrow_i16x8_u
962; CHECK-NEXT:    i64x2.extract_lane 0
963; CHECK-NEXT:    i64.store 16
964; CHECK-NEXT:    # fallthrough-return
965  %q = ptrtoint <8 x i8>* %p to i32
966  %r = add nuw i32 %q, 16
967  %s = inttoptr i32 %r to <8 x i8>*
968  store <8 x i8> %v , <8 x i8>* %s
969  ret void
970}
971
972define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
973; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
974; CHECK:         .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
975; CHECK-NEXT:  # %bb.0:
976; CHECK-NEXT:    local.get 1
977; CHECK-NEXT:    local.get 0
978; CHECK-NEXT:    v128.store 16
979; CHECK-NEXT:    # fallthrough-return
980  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
981  store <8 x i16> %v , <8 x i16>* %s
982  ret void
983}
984
985define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
986; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset:
987; CHECK:         .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
988; CHECK-NEXT:  # %bb.0:
989; CHECK-NEXT:    local.get 1
990; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
991; CHECK-NEXT:    local.get 0
992; CHECK-NEXT:    v128.and
993; CHECK-NEXT:    local.get 0
994; CHECK-NEXT:    i8x16.narrow_i16x8_u
995; CHECK-NEXT:    i64x2.extract_lane 0
996; CHECK-NEXT:    i64.store 8
997; CHECK-NEXT:    # fallthrough-return
998  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
999  store <8 x i8> %v , <8 x i8>* %s
1000  ret void
1001}
1002
1003define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
1004; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
1005; CHECK:         .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
1006; CHECK-NEXT:  # %bb.0:
1007; CHECK-NEXT:    local.get 1
1008; CHECK-NEXT:    i32.const -16
1009; CHECK-NEXT:    i32.add
1010; CHECK-NEXT:    local.get 0
1011; CHECK-NEXT:    v128.store 0
1012; CHECK-NEXT:    # fallthrough-return
1013  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
1014  store <8 x i16> %v , <8 x i16>* %s
1015  ret void
1016}
1017
1018define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) {
1019; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset:
1020; CHECK:         .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
1021; CHECK-NEXT:  # %bb.0:
1022; CHECK-NEXT:    local.get 1
1023; CHECK-NEXT:    i32.const -8
1024; CHECK-NEXT:    i32.add
1025; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
1026; CHECK-NEXT:    local.get 0
1027; CHECK-NEXT:    v128.and
1028; CHECK-NEXT:    local.get 0
1029; CHECK-NEXT:    i8x16.narrow_i16x8_u
1030; CHECK-NEXT:    i64x2.extract_lane 0
1031; CHECK-NEXT:    i64.store 0
1032; CHECK-NEXT:    # fallthrough-return
1033  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
1034  store <8 x i8> %v , <8 x i8>* %s
1035  ret void
1036}
1037
1038define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
1039; CHECK-LABEL: store_v8i16_with_unfolded_offset:
1040; CHECK:         .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
1041; CHECK-NEXT:  # %bb.0:
1042; CHECK-NEXT:    local.get 1
1043; CHECK-NEXT:    i32.const 16
1044; CHECK-NEXT:    i32.add
1045; CHECK-NEXT:    local.get 0
1046; CHECK-NEXT:    v128.store 0
1047; CHECK-NEXT:    # fallthrough-return
1048  %q = ptrtoint <8 x i16>* %p to i32
1049  %r = add nsw i32 %q, 16
1050  %s = inttoptr i32 %r to <8 x i16>*
1051  store <8 x i16> %v , <8 x i16>* %s
1052  ret void
1053}
1054
1055define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) {
1056; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset:
1057; CHECK:         .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> ()
1058; CHECK-NEXT:  # %bb.0:
1059; CHECK-NEXT:    local.get 1
1060; CHECK-NEXT:    i32.const 16
1061; CHECK-NEXT:    i32.add
1062; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
1063; CHECK-NEXT:    local.get 0
1064; CHECK-NEXT:    v128.and
1065; CHECK-NEXT:    local.get 0
1066; CHECK-NEXT:    i8x16.narrow_i16x8_u
1067; CHECK-NEXT:    i64x2.extract_lane 0
1068; CHECK-NEXT:    i64.store 0
1069; CHECK-NEXT:    # fallthrough-return
1070  %q = ptrtoint <8 x i8>* %p to i32
1071  %r = add nsw i32 %q, 16
1072  %s = inttoptr i32 %r to <8 x i8>*
1073  store <8 x i8> %v , <8 x i8>* %s
1074  ret void
1075}
1076
1077define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
1078; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
1079; CHECK:         .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1080; CHECK-NEXT:  # %bb.0:
1081; CHECK-NEXT:    local.get 1
1082; CHECK-NEXT:    i32.const 16
1083; CHECK-NEXT:    i32.add
1084; CHECK-NEXT:    local.get 0
1085; CHECK-NEXT:    v128.store 0
1086; CHECK-NEXT:    # fallthrough-return
1087  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
1088  store <8 x i16> %v , <8 x i16>* %s
1089  ret void
1090}
1091
1092define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
1093; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset:
1094; CHECK:         .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1095; CHECK-NEXT:  # %bb.0:
1096; CHECK-NEXT:    local.get 1
1097; CHECK-NEXT:    i32.const 8
1098; CHECK-NEXT:    i32.add
1099; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
1100; CHECK-NEXT:    local.get 0
1101; CHECK-NEXT:    v128.and
1102; CHECK-NEXT:    local.get 0
1103; CHECK-NEXT:    i8x16.narrow_i16x8_u
1104; CHECK-NEXT:    i64x2.extract_lane 0
1105; CHECK-NEXT:    i64.store 0
1106; CHECK-NEXT:    # fallthrough-return
1107  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
1108  store <8 x i8> %v , <8 x i8>* %s
1109  ret void
1110}
1111
1112define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
1113; CHECK-LABEL: store_v8i16_to_numeric_address:
1114; CHECK:         .functype store_v8i16_to_numeric_address (v128) -> ()
1115; CHECK-NEXT:  # %bb.0:
1116; CHECK-NEXT:    i32.const 0
1117; CHECK-NEXT:    local.get 0
1118; CHECK-NEXT:    v128.store 32
1119; CHECK-NEXT:    # fallthrough-return
1120  %s = inttoptr i32 32 to <8 x i16>*
1121  store <8 x i16> %v , <8 x i16>* %s
1122  ret void
1123}
1124
1125define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) {
1126; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address:
1127; CHECK:         .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
1128; CHECK-NEXT:  # %bb.0:
1129; CHECK-NEXT:    i32.const 0
1130; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
1131; CHECK-NEXT:    local.get 0
1132; CHECK-NEXT:    v128.and
1133; CHECK-NEXT:    local.get 0
1134; CHECK-NEXT:    i8x16.narrow_i16x8_u
1135; CHECK-NEXT:    i64x2.extract_lane 0
1136; CHECK-NEXT:    i64.store 32
1137; CHECK-NEXT:    # fallthrough-return
1138  %s = inttoptr i32 32 to <8 x i8>*
1139  store <8 x i8> %v , <8 x i8>* %s
1140  ret void
1141}
1142
1143define void @store_v8i16_to_global_address(<8 x i16> %v) {
1144; CHECK-LABEL: store_v8i16_to_global_address:
1145; CHECK:         .functype store_v8i16_to_global_address (v128) -> ()
1146; CHECK-NEXT:  # %bb.0:
1147; CHECK-NEXT:    i32.const 0
1148; CHECK-NEXT:    local.get 0
1149; CHECK-NEXT:    v128.store gv_v8i16
1150; CHECK-NEXT:    # fallthrough-return
1151  store <8 x i16> %v , <8 x i16>* @gv_v8i16
1152  ret void
1153}
1154
1155define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
1156; CHECK-LABEL: store_narrowing_v8i16_to_global_address:
1157; CHECK:         .functype store_narrowing_v8i16_to_global_address (v128) -> ()
1158; CHECK-NEXT:  # %bb.0:
1159; CHECK-NEXT:    i32.const 0
1160; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
1161; CHECK-NEXT:    local.get 0
1162; CHECK-NEXT:    v128.and
1163; CHECK-NEXT:    local.get 0
1164; CHECK-NEXT:    i8x16.narrow_i16x8_u
1165; CHECK-NEXT:    i64x2.extract_lane 0
1166; CHECK-NEXT:    i64.store gv_v8i8
1167; CHECK-NEXT:    # fallthrough-return
1168  store <8 x i8> %v , <8 x i8>* @gv_v8i8
1169  ret void
1170}
1171
1172; ==============================================================================
1173; 4 x i32
1174; ==============================================================================
1175define <4 x i32> @load_v4i32(<4 x i32>* %p) {
1176; CHECK-LABEL: load_v4i32:
1177; CHECK:         .functype load_v4i32 (i32) -> (v128)
1178; CHECK-NEXT:  # %bb.0:
1179; CHECK-NEXT:    local.get 0
1180; CHECK-NEXT:    v128.load 0
1181; CHECK-NEXT:    # fallthrough-return
1182  %v = load <4 x i32>, <4 x i32>* %p
1183  ret <4 x i32> %v
1184}
1185
1186define <4 x i32> @load_splat_v4i32(i32* %addr) {
1187; CHECK-LABEL: load_splat_v4i32:
1188; CHECK:         .functype load_splat_v4i32 (i32) -> (v128)
1189; CHECK-NEXT:  # %bb.0:
1190; CHECK-NEXT:    local.get 0
1191; CHECK-NEXT:    v128.load32_splat 0
1192; CHECK-NEXT:    # fallthrough-return
1193  %e = load i32, i32* %addr, align 4
1194  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1195  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1196  ret <4 x i32> %v2
1197}
1198
1199define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
1200; CHECK-LABEL: load_sext_v4i32:
1201; CHECK:         .functype load_sext_v4i32 (i32) -> (v128)
1202; CHECK-NEXT:  # %bb.0:
1203; CHECK-NEXT:    local.get 0
1204; CHECK-NEXT:    i32x4.load16x4_s 0
1205; CHECK-NEXT:    # fallthrough-return
1206  %v = load <4 x i16>, <4 x i16>* %p
1207  %v2 = sext <4 x i16> %v to <4 x i32>
1208  ret <4 x i32> %v2
1209}
1210
1211define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
1212; CHECK-LABEL: load_zext_v4i32:
1213; CHECK:         .functype load_zext_v4i32 (i32) -> (v128)
1214; CHECK-NEXT:  # %bb.0:
1215; CHECK-NEXT:    local.get 0
1216; CHECK-NEXT:    i32x4.load16x4_u 0
1217; CHECK-NEXT:    # fallthrough-return
1218  %v = load <4 x i16>, <4 x i16>* %p
1219  %v2 = zext <4 x i16> %v to <4 x i32>
1220  ret <4 x i32> %v2
1221}
1222
1223define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
1224; CHECK-LABEL: load_ext_v4i32:
1225; CHECK:         .functype load_ext_v4i32 (i32) -> (v128)
1226; CHECK-NEXT:  # %bb.0:
1227; CHECK-NEXT:    local.get 0
1228; CHECK-NEXT:    i32x4.load16x4_u 0
1229; CHECK-NEXT:    # fallthrough-return
1230  %v = load <4 x i16>, <4 x i16>* %p
1231  ret <4 x i16> %v
1232}
1233
1234define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
1235; CHECK-LABEL: load_v4i32_with_folded_offset:
1236; CHECK:         .functype load_v4i32_with_folded_offset (i32) -> (v128)
1237; CHECK-NEXT:  # %bb.0:
1238; CHECK-NEXT:    local.get 0
1239; CHECK-NEXT:    v128.load 16
1240; CHECK-NEXT:    # fallthrough-return
1241  %q = ptrtoint <4 x i32>* %p to i32
1242  %r = add nuw i32 %q, 16
1243  %s = inttoptr i32 %r to <4 x i32>*
1244  %v = load <4 x i32>, <4 x i32>* %s
1245  ret <4 x i32> %v
1246}
1247
1248define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
1249; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1250; CHECK:         .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
1251; CHECK-NEXT:  # %bb.0:
1252; CHECK-NEXT:    local.get 0
1253; CHECK-NEXT:    v128.load32_splat 16
1254; CHECK-NEXT:    # fallthrough-return
1255  %q = ptrtoint i32* %p to i32
1256  %r = add nuw i32 %q, 16
1257  %s = inttoptr i32 %r to i32*
1258  %e = load i32, i32* %s
1259  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1260  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1261  ret <4 x i32> %v2
1262}
1263
1264define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
1265; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
1266; CHECK:         .functype load_sext_v4i32_with_folded_offset (i32) -> (v128)
1267; CHECK-NEXT:  # %bb.0:
1268; CHECK-NEXT:    local.get 0
1269; CHECK-NEXT:    i32x4.load16x4_s 16
1270; CHECK-NEXT:    # fallthrough-return
1271  %q = ptrtoint <4 x i16>* %p to i32
1272  %r = add nuw i32 %q, 16
1273  %s = inttoptr i32 %r to <4 x i16>*
1274  %v = load <4 x i16>, <4 x i16>* %s
1275  %v2 = sext <4 x i16> %v to <4 x i32>
1276  ret <4 x i32> %v2
1277}
1278
1279define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
1280; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
1281; CHECK:         .functype load_zext_v4i32_with_folded_offset (i32) -> (v128)
1282; CHECK-NEXT:  # %bb.0:
1283; CHECK-NEXT:    local.get 0
1284; CHECK-NEXT:    i32x4.load16x4_u 16
1285; CHECK-NEXT:    # fallthrough-return
1286  %q = ptrtoint <4 x i16>* %p to i32
1287  %r = add nuw i32 %q, 16
1288  %s = inttoptr i32 %r to <4 x i16>*
1289  %v = load <4 x i16>, <4 x i16>* %s
1290  %v2 = zext <4 x i16> %v to <4 x i32>
1291  ret <4 x i32> %v2
1292}
1293
1294define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
1295; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1296; CHECK:         .functype load_ext_v4i32_with_folded_offset (i32) -> (v128)
1297; CHECK-NEXT:  # %bb.0:
1298; CHECK-NEXT:    local.get 0
1299; CHECK-NEXT:    i32x4.load16x4_u 16
1300; CHECK-NEXT:    # fallthrough-return
1301  %q = ptrtoint <4 x i16>* %p to i32
1302  %r = add nuw i32 %q, 16
1303  %s = inttoptr i32 %r to <4 x i16>*
1304  %v = load <4 x i16>, <4 x i16>* %s
1305  ret <4 x i16> %v
1306}
1307
1308define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
1309; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1310; CHECK:         .functype load_v4i32_with_folded_gep_offset (i32) -> (v128)
1311; CHECK-NEXT:  # %bb.0:
1312; CHECK-NEXT:    local.get 0
1313; CHECK-NEXT:    v128.load 16
1314; CHECK-NEXT:    # fallthrough-return
1315  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1316  %v = load <4 x i32>, <4 x i32>* %s
1317  ret <4 x i32> %v
1318}
1319
1320define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
1321; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1322; CHECK:         .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
1323; CHECK-NEXT:  # %bb.0:
1324; CHECK-NEXT:    local.get 0
1325; CHECK-NEXT:    v128.load32_splat 4
1326; CHECK-NEXT:    # fallthrough-return
1327  %s = getelementptr inbounds i32, i32* %p, i32 1
1328  %e = load i32, i32* %s
1329  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1330  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1331  ret <4 x i32> %v2
1332}
1333
1334define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1335; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
1336; CHECK:         .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128)
1337; CHECK-NEXT:  # %bb.0:
1338; CHECK-NEXT:    local.get 0
1339; CHECK-NEXT:    i32x4.load16x4_s 8
1340; CHECK-NEXT:    # fallthrough-return
1341  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1342  %v = load <4 x i16>, <4 x i16>* %s
1343  %v2 = sext <4 x i16> %v to <4 x i32>
1344  ret <4 x i32> %v2
1345}
1346
1347define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1348; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
1349; CHECK:         .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128)
1350; CHECK-NEXT:  # %bb.0:
1351; CHECK-NEXT:    local.get 0
1352; CHECK-NEXT:    i32x4.load16x4_u 8
1353; CHECK-NEXT:    # fallthrough-return
1354  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1355  %v = load <4 x i16>, <4 x i16>* %s
1356  %v2 = zext <4 x i16> %v to <4 x i32>
1357  ret <4 x i32> %v2
1358}
1359
1360define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1361; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1362; CHECK:         .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128)
1363; CHECK-NEXT:  # %bb.0:
1364; CHECK-NEXT:    local.get 0
1365; CHECK-NEXT:    i32x4.load16x4_u 8
1366; CHECK-NEXT:    # fallthrough-return
1367  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1368  %v = load <4 x i16>, <4 x i16>* %s
1369  ret <4 x i16> %v
1370}
1371
1372define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
1373; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1374; CHECK:         .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1375; CHECK-NEXT:  # %bb.0:
1376; CHECK-NEXT:    local.get 0
1377; CHECK-NEXT:    i32.const -16
1378; CHECK-NEXT:    i32.add
1379; CHECK-NEXT:    v128.load 0
1380; CHECK-NEXT:    # fallthrough-return
1381  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1382  %v = load <4 x i32>, <4 x i32>* %s
1383  ret <4 x i32> %v
1384}
1385
1386define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
1387; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1388; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1389; CHECK-NEXT:  # %bb.0:
1390; CHECK-NEXT:    local.get 0
1391; CHECK-NEXT:    i32.const -4
1392; CHECK-NEXT:    i32.add
1393; CHECK-NEXT:    v128.load32_splat 0
1394; CHECK-NEXT:    # fallthrough-return
1395  %s = getelementptr inbounds i32, i32* %p, i32 -1
1396  %e = load i32, i32* %s
1397  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1398  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1399  ret <4 x i32> %v2
1400}
1401
1402define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1403; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
1404; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1405; CHECK-NEXT:  # %bb.0:
1406; CHECK-NEXT:    local.get 0
1407; CHECK-NEXT:    i32.const -8
1408; CHECK-NEXT:    i32.add
1409; CHECK-NEXT:    i32x4.load16x4_s 0
1410; CHECK-NEXT:    # fallthrough-return
1411  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1412  %v = load <4 x i16>, <4 x i16>* %s
1413  %v2 = sext <4 x i16> %v to <4 x i32>
1414  ret <4 x i32> %v2
1415}
1416
1417define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1418; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
1419; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1420; CHECK-NEXT:  # %bb.0:
1421; CHECK-NEXT:    local.get 0
1422; CHECK-NEXT:    i32.const -8
1423; CHECK-NEXT:    i32.add
1424; CHECK-NEXT:    i32x4.load16x4_u 0
1425; CHECK-NEXT:    # fallthrough-return
1426  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1427  %v = load <4 x i16>, <4 x i16>* %s
1428  %v2 = zext <4 x i16> %v to <4 x i32>
1429  ret <4 x i32> %v2
1430}
1431
1432define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1433; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1434; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1435; CHECK-NEXT:  # %bb.0:
1436; CHECK-NEXT:    local.get 0
1437; CHECK-NEXT:    i32.const -8
1438; CHECK-NEXT:    i32.add
1439; CHECK-NEXT:    i32x4.load16x4_u 0
1440; CHECK-NEXT:    # fallthrough-return
1441  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1442  %v = load <4 x i16>, <4 x i16>* %s
1443  ret <4 x i16> %v
1444}
1445
1446define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
1447; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1448; CHECK:         .functype load_v4i32_with_unfolded_offset (i32) -> (v128)
1449; CHECK-NEXT:  # %bb.0:
1450; CHECK-NEXT:    local.get 0
1451; CHECK-NEXT:    i32.const 16
1452; CHECK-NEXT:    i32.add
1453; CHECK-NEXT:    v128.load 0
1454; CHECK-NEXT:    # fallthrough-return
1455  %q = ptrtoint <4 x i32>* %p to i32
1456  %r = add nsw i32 %q, 16
1457  %s = inttoptr i32 %r to <4 x i32>*
1458  %v = load <4 x i32>, <4 x i32>* %s
1459  ret <4 x i32> %v
1460}
1461
1462define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
1463; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1464; CHECK:         .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128)
1465; CHECK-NEXT:  # %bb.0:
1466; CHECK-NEXT:    local.get 0
1467; CHECK-NEXT:    i32.const 16
1468; CHECK-NEXT:    i32.add
1469; CHECK-NEXT:    v128.load32_splat 0
1470; CHECK-NEXT:    # fallthrough-return
1471  %q = ptrtoint i32* %p to i32
1472  %r = add nsw i32 %q, 16
1473  %s = inttoptr i32 %r to i32*
1474  %e = load i32, i32* %s
1475  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1476  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1477  ret <4 x i32> %v2
1478}
1479
1480define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1481; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
1482; CHECK:         .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128)
1483; CHECK-NEXT:  # %bb.0:
1484; CHECK-NEXT:    local.get 0
1485; CHECK-NEXT:    i32.const 16
1486; CHECK-NEXT:    i32.add
1487; CHECK-NEXT:    i32x4.load16x4_s 0
1488; CHECK-NEXT:    # fallthrough-return
1489  %q = ptrtoint <4 x i16>* %p to i32
1490  %r = add nsw i32 %q, 16
1491  %s = inttoptr i32 %r to <4 x i16>*
1492  %v = load <4 x i16>, <4 x i16>* %s
1493  %v2 = sext <4 x i16> %v to <4 x i32>
1494  ret <4 x i32> %v2
1495}
1496
1497define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1498; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
1499; CHECK:         .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128)
1500; CHECK-NEXT:  # %bb.0:
1501; CHECK-NEXT:    local.get 0
1502; CHECK-NEXT:    i32.const 16
1503; CHECK-NEXT:    i32.add
1504; CHECK-NEXT:    i32x4.load16x4_u 0
1505; CHECK-NEXT:    # fallthrough-return
1506  %q = ptrtoint <4 x i16>* %p to i32
1507  %r = add nsw i32 %q, 16
1508  %s = inttoptr i32 %r to <4 x i16>*
1509  %v = load <4 x i16>, <4 x i16>* %s
1510  %v2 = zext <4 x i16> %v to <4 x i32>
1511  ret <4 x i32> %v2
1512}
1513
1514define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1515; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1516; CHECK:         .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128)
1517; CHECK-NEXT:  # %bb.0:
1518; CHECK-NEXT:    local.get 0
1519; CHECK-NEXT:    i32.const 16
1520; CHECK-NEXT:    i32.add
1521; CHECK-NEXT:    i32x4.load16x4_u 0
1522; CHECK-NEXT:    # fallthrough-return
1523  %q = ptrtoint <4 x i16>* %p to i32
1524  %r = add nsw i32 %q, 16
1525  %s = inttoptr i32 %r to <4 x i16>*
1526  %v = load <4 x i16>, <4 x i16>* %s
1527  ret <4 x i16> %v
1528}
1529
1530define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
1531; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1532; CHECK:         .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1533; CHECK-NEXT:  # %bb.0:
1534; CHECK-NEXT:    local.get 0
1535; CHECK-NEXT:    i32.const 16
1536; CHECK-NEXT:    i32.add
1537; CHECK-NEXT:    v128.load 0
1538; CHECK-NEXT:    # fallthrough-return
1539  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1540  %v = load <4 x i32>, <4 x i32>* %s
1541  ret <4 x i32> %v
1542}
1543
1544define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
1545; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1546; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1547; CHECK-NEXT:  # %bb.0:
1548; CHECK-NEXT:    local.get 0
1549; CHECK-NEXT:    i32.const 4
1550; CHECK-NEXT:    i32.add
1551; CHECK-NEXT:    v128.load32_splat 0
1552; CHECK-NEXT:    # fallthrough-return
1553  %s = getelementptr i32, i32* %p, i32 1
1554  %e = load i32, i32* %s
1555  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1556  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1557  ret <4 x i32> %v2
1558}
1559
1560define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1561; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
1562; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1563; CHECK-NEXT:  # %bb.0:
1564; CHECK-NEXT:    local.get 0
1565; CHECK-NEXT:    i32.const 8
1566; CHECK-NEXT:    i32.add
1567; CHECK-NEXT:    i32x4.load16x4_s 0
1568; CHECK-NEXT:    # fallthrough-return
1569  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1570  %v = load <4 x i16>, <4 x i16>* %s
1571  %v2 = sext <4 x i16> %v to <4 x i32>
1572  ret <4 x i32> %v2
1573}
1574
1575define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1576; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
1577; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1578; CHECK-NEXT:  # %bb.0:
1579; CHECK-NEXT:    local.get 0
1580; CHECK-NEXT:    i32.const 8
1581; CHECK-NEXT:    i32.add
1582; CHECK-NEXT:    i32x4.load16x4_u 0
1583; CHECK-NEXT:    # fallthrough-return
1584  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1585  %v = load <4 x i16>, <4 x i16>* %s
1586  %v2 = zext <4 x i16> %v to <4 x i32>
1587  ret <4 x i32> %v2
1588}
1589
1590define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1591; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1592; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1593; CHECK-NEXT:  # %bb.0:
1594; CHECK-NEXT:    local.get 0
1595; CHECK-NEXT:    i32.const 8
1596; CHECK-NEXT:    i32.add
1597; CHECK-NEXT:    i32x4.load16x4_u 0
1598; CHECK-NEXT:    # fallthrough-return
1599  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1600  %v = load <4 x i16>, <4 x i16>* %s
1601  ret <4 x i16> %v
1602}
1603
1604define <4 x i32> @load_v4i32_from_numeric_address() {
1605; CHECK-LABEL: load_v4i32_from_numeric_address:
1606; CHECK:         .functype load_v4i32_from_numeric_address () -> (v128)
1607; CHECK-NEXT:  # %bb.0:
1608; CHECK-NEXT:    i32.const 0
1609; CHECK-NEXT:    v128.load 32
1610; CHECK-NEXT:    # fallthrough-return
1611  %s = inttoptr i32 32 to <4 x i32>*
1612  %v = load <4 x i32>, <4 x i32>* %s
1613  ret <4 x i32> %v
1614}
1615
1616define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1617; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1618; CHECK:         .functype load_splat_v4i32_from_numeric_address () -> (v128)
1619; CHECK-NEXT:  # %bb.0:
1620; CHECK-NEXT:    i32.const 0
1621; CHECK-NEXT:    v128.load32_splat 32
1622; CHECK-NEXT:    # fallthrough-return
1623  %s = inttoptr i32 32 to i32*
1624  %e = load i32, i32* %s
1625  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1626  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1627  ret <4 x i32> %v2
1628}
1629
1630define <4 x i32> @load_sext_v4i32_from_numeric_address() {
1631; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
1632; CHECK:         .functype load_sext_v4i32_from_numeric_address () -> (v128)
1633; CHECK-NEXT:  # %bb.0:
1634; CHECK-NEXT:    i32.const 0
1635; CHECK-NEXT:    i32x4.load16x4_s 32
1636; CHECK-NEXT:    # fallthrough-return
1637  %s = inttoptr i32 32 to <4 x i16>*
1638  %v = load <4 x i16>, <4 x i16>* %s
1639  %v2 = sext <4 x i16> %v to <4 x i32>
1640  ret <4 x i32> %v2
1641}
1642
1643define <4 x i32> @load_zext_v4i32_from_numeric_address() {
1644; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
1645; CHECK:         .functype load_zext_v4i32_from_numeric_address () -> (v128)
1646; CHECK-NEXT:  # %bb.0:
1647; CHECK-NEXT:    i32.const 0
1648; CHECK-NEXT:    i32x4.load16x4_u 32
1649; CHECK-NEXT:    # fallthrough-return
1650  %s = inttoptr i32 32 to <4 x i16>*
1651  %v = load <4 x i16>, <4 x i16>* %s
1652  %v2 = zext <4 x i16> %v to <4 x i32>
1653  ret <4 x i32> %v2
1654}
1655
1656define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1657; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1658; CHECK:         .functype load_ext_v4i32_from_numeric_address () -> (v128)
1659; CHECK-NEXT:  # %bb.0:
1660; CHECK-NEXT:    i32.const 0
1661; CHECK-NEXT:    i32x4.load16x4_u 32
1662; CHECK-NEXT:    # fallthrough-return
1663  %s = inttoptr i32 32 to <4 x i16>*
1664  %v = load <4 x i16>, <4 x i16>* %s
1665  ret <4 x i16> %v
1666}
1667
1668@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1669define <4 x i32> @load_v4i32_from_global_address() {
1670; CHECK-LABEL: load_v4i32_from_global_address:
1671; CHECK:         .functype load_v4i32_from_global_address () -> (v128)
1672; CHECK-NEXT:  # %bb.0:
1673; CHECK-NEXT:    i32.const 0
1674; CHECK-NEXT:    v128.load gv_v4i32
1675; CHECK-NEXT:    # fallthrough-return
1676  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
1677  ret <4 x i32> %v
1678}
1679
1680@gv_i32 = global i32 42
1681define <4 x i32> @load_splat_v4i32_from_global_address() {
1682; CHECK-LABEL: load_splat_v4i32_from_global_address:
1683; CHECK:         .functype load_splat_v4i32_from_global_address () -> (v128)
1684; CHECK-NEXT:  # %bb.0:
1685; CHECK-NEXT:    i32.const 0
1686; CHECK-NEXT:    v128.load32_splat gv_i32
1687; CHECK-NEXT:    # fallthrough-return
1688  %e = load i32, i32* @gv_i32
1689  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1690  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1691  ret <4 x i32> %v2
1692}
1693
1694@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1695define <4 x i32> @load_sext_v4i32_from_global_address() {
1696; CHECK-LABEL: load_sext_v4i32_from_global_address:
1697; CHECK:         .functype load_sext_v4i32_from_global_address () -> (v128)
1698; CHECK-NEXT:  # %bb.0:
1699; CHECK-NEXT:    i32.const 0
1700; CHECK-NEXT:    i32x4.load16x4_s gv_v4i16
1701; CHECK-NEXT:    # fallthrough-return
1702  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1703  %v2 = sext <4 x i16> %v to <4 x i32>
1704  ret <4 x i32> %v2
1705}
1706
1707define <4 x i32> @load_zext_v4i32_from_global_address() {
1708; CHECK-LABEL: load_zext_v4i32_from_global_address:
1709; CHECK:         .functype load_zext_v4i32_from_global_address () -> (v128)
1710; CHECK-NEXT:  # %bb.0:
1711; CHECK-NEXT:    i32.const 0
1712; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1713; CHECK-NEXT:    # fallthrough-return
1714  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1715  %v2 = zext <4 x i16> %v to <4 x i32>
1716  ret <4 x i32> %v2
1717}
1718
1719define <4 x i16> @load_ext_v4i32_from_global_address() {
1720; CHECK-LABEL: load_ext_v4i32_from_global_address:
1721; CHECK:         .functype load_ext_v4i32_from_global_address () -> (v128)
1722; CHECK-NEXT:  # %bb.0:
1723; CHECK-NEXT:    i32.const 0
1724; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1725; CHECK-NEXT:    # fallthrough-return
1726  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1727  ret <4 x i16> %v
1728}
1729
1730define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
1731; CHECK-LABEL: store_v4i32:
1732; CHECK:         .functype store_v4i32 (v128, i32) -> ()
1733; CHECK-NEXT:  # %bb.0:
1734; CHECK-NEXT:    local.get 1
1735; CHECK-NEXT:    local.get 0
1736; CHECK-NEXT:    v128.store 0
1737; CHECK-NEXT:    # fallthrough-return
1738  store <4 x i32> %v , <4 x i32>* %p
1739  ret void
1740}
1741
1742define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) {
1743; CHECK-LABEL: store_narrowing_v4i32:
1744; CHECK:         .functype store_narrowing_v4i32 (v128, i32) -> ()
1745; CHECK-NEXT:  # %bb.0:
1746; CHECK-NEXT:    local.get 1
1747; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1748; CHECK-NEXT:    local.get 0
1749; CHECK-NEXT:    v128.and
1750; CHECK-NEXT:    local.get 0
1751; CHECK-NEXT:    i16x8.narrow_i32x4_u
1752; CHECK-NEXT:    i64x2.extract_lane 0
1753; CHECK-NEXT:    i64.store 0
1754; CHECK-NEXT:    # fallthrough-return
1755  store <4 x i16> %v , <4 x i16>* %p
1756  ret void
1757}
1758
1759define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
1760; CHECK-LABEL: store_v4i32_with_folded_offset:
1761; CHECK:         .functype store_v4i32_with_folded_offset (v128, i32) -> ()
1762; CHECK-NEXT:  # %bb.0:
1763; CHECK-NEXT:    local.get 1
1764; CHECK-NEXT:    local.get 0
1765; CHECK-NEXT:    v128.store 16
1766; CHECK-NEXT:    # fallthrough-return
1767  %q = ptrtoint <4 x i32>* %p to i32
1768  %r = add nuw i32 %q, 16
1769  %s = inttoptr i32 %r to <4 x i32>*
1770  store <4 x i32> %v , <4 x i32>* %s
1771  ret void
1772}
1773
1774define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) {
1775; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset:
1776; CHECK:         .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
1777; CHECK-NEXT:  # %bb.0:
1778; CHECK-NEXT:    local.get 1
1779; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1780; CHECK-NEXT:    local.get 0
1781; CHECK-NEXT:    v128.and
1782; CHECK-NEXT:    local.get 0
1783; CHECK-NEXT:    i16x8.narrow_i32x4_u
1784; CHECK-NEXT:    i64x2.extract_lane 0
1785; CHECK-NEXT:    i64.store 16
1786; CHECK-NEXT:    # fallthrough-return
1787  %q = ptrtoint <4 x i16>* %p to i32
1788  %r = add nuw i32 %q, 16
1789  %s = inttoptr i32 %r to <4 x i16>*
1790  store <4 x i16> %v , <4 x i16>* %s
1791  ret void
1792}
1793
1794define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1795; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
1796; CHECK:         .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
1797; CHECK-NEXT:  # %bb.0:
1798; CHECK-NEXT:    local.get 1
1799; CHECK-NEXT:    local.get 0
1800; CHECK-NEXT:    v128.store 16
1801; CHECK-NEXT:    # fallthrough-return
1802  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1803  store <4 x i32> %v , <4 x i32>* %s
1804  ret void
1805}
1806
1807define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
1808; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset:
1809; CHECK:         .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
1810; CHECK-NEXT:  # %bb.0:
1811; CHECK-NEXT:    local.get 1
1812; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1813; CHECK-NEXT:    local.get 0
1814; CHECK-NEXT:    v128.and
1815; CHECK-NEXT:    local.get 0
1816; CHECK-NEXT:    i16x8.narrow_i32x4_u
1817; CHECK-NEXT:    i64x2.extract_lane 0
1818; CHECK-NEXT:    i64.store 8
1819; CHECK-NEXT:    # fallthrough-return
1820  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1821  store <4 x i16> %v , <4 x i16>* %s
1822  ret void
1823}
1824
1825define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
1826; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
1827; CHECK:         .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1828; CHECK-NEXT:  # %bb.0:
1829; CHECK-NEXT:    local.get 1
1830; CHECK-NEXT:    i32.const -16
1831; CHECK-NEXT:    i32.add
1832; CHECK-NEXT:    local.get 0
1833; CHECK-NEXT:    v128.store 0
1834; CHECK-NEXT:    # fallthrough-return
1835  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1836  store <4 x i32> %v , <4 x i32>* %s
1837  ret void
1838}
1839
1840define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) {
1841; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset:
1842; CHECK:         .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1843; CHECK-NEXT:  # %bb.0:
1844; CHECK-NEXT:    local.get 1
1845; CHECK-NEXT:    i32.const -8
1846; CHECK-NEXT:    i32.add
1847; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1848; CHECK-NEXT:    local.get 0
1849; CHECK-NEXT:    v128.and
1850; CHECK-NEXT:    local.get 0
1851; CHECK-NEXT:    i16x8.narrow_i32x4_u
1852; CHECK-NEXT:    i64x2.extract_lane 0
1853; CHECK-NEXT:    i64.store 0
1854; CHECK-NEXT:    # fallthrough-return
1855  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1856  store <4 x i16> %v , <4 x i16>* %s
1857  ret void
1858}
1859
1860define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
1861; CHECK-LABEL: store_v4i32_with_unfolded_offset:
1862; CHECK:         .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
1863; CHECK-NEXT:  # %bb.0:
1864; CHECK-NEXT:    local.get 1
1865; CHECK-NEXT:    i32.const 16
1866; CHECK-NEXT:    i32.add
1867; CHECK-NEXT:    local.get 0
1868; CHECK-NEXT:    v128.store 0
1869; CHECK-NEXT:    # fallthrough-return
1870  %q = ptrtoint <4 x i32>* %p to i32
1871  %r = add nsw i32 %q, 16
1872  %s = inttoptr i32 %r to <4 x i32>*
1873  store <4 x i32> %v , <4 x i32>* %s
1874  ret void
1875}
1876
1877define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) {
1878; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset:
1879; CHECK:         .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> ()
1880; CHECK-NEXT:  # %bb.0:
1881; CHECK-NEXT:    local.get 1
1882; CHECK-NEXT:    i32.const 16
1883; CHECK-NEXT:    i32.add
1884; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1885; CHECK-NEXT:    local.get 0
1886; CHECK-NEXT:    v128.and
1887; CHECK-NEXT:    local.get 0
1888; CHECK-NEXT:    i16x8.narrow_i32x4_u
1889; CHECK-NEXT:    i64x2.extract_lane 0
1890; CHECK-NEXT:    i64.store 0
1891; CHECK-NEXT:    # fallthrough-return
1892  %q = ptrtoint <4 x i16>* %p to i32
1893  %r = add nsw i32 %q, 16
1894  %s = inttoptr i32 %r to <4 x i16>*
1895  store <4 x i16> %v , <4 x i16>* %s
1896  ret void
1897}
1898
1899define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1900; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
1901; CHECK:         .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
1902; CHECK-NEXT:  # %bb.0:
1903; CHECK-NEXT:    local.get 1
1904; CHECK-NEXT:    i32.const 16
1905; CHECK-NEXT:    i32.add
1906; CHECK-NEXT:    local.get 0
1907; CHECK-NEXT:    v128.store 0
1908; CHECK-NEXT:    # fallthrough-return
1909  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1910  store <4 x i32> %v , <4 x i32>* %s
1911  ret void
1912}
1913
1914define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
1915; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset:
1916; CHECK:         .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
1917; CHECK-NEXT:  # %bb.0:
1918; CHECK-NEXT:    local.get 1
1919; CHECK-NEXT:    i32.const 8
1920; CHECK-NEXT:    i32.add
1921; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1922; CHECK-NEXT:    local.get 0
1923; CHECK-NEXT:    v128.and
1924; CHECK-NEXT:    local.get 0
1925; CHECK-NEXT:    i16x8.narrow_i32x4_u
1926; CHECK-NEXT:    i64x2.extract_lane 0
1927; CHECK-NEXT:    i64.store 0
1928; CHECK-NEXT:    # fallthrough-return
1929  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1930  store <4 x i16> %v , <4 x i16>* %s
1931  ret void
1932}
1933
1934define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
1935; CHECK-LABEL: store_v4i32_to_numeric_address:
1936; CHECK:         .functype store_v4i32_to_numeric_address (v128) -> ()
1937; CHECK-NEXT:  # %bb.0:
1938; CHECK-NEXT:    i32.const 0
1939; CHECK-NEXT:    local.get 0
1940; CHECK-NEXT:    v128.store 32
1941; CHECK-NEXT:    # fallthrough-return
1942  %s = inttoptr i32 32 to <4 x i32>*
1943  store <4 x i32> %v , <4 x i32>* %s
1944  ret void
1945}
1946
1947define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
1948; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address:
1949; CHECK:         .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
1950; CHECK-NEXT:  # %bb.0:
1951; CHECK-NEXT:    i32.const 0
1952; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1953; CHECK-NEXT:    local.get 0
1954; CHECK-NEXT:    v128.and
1955; CHECK-NEXT:    local.get 0
1956; CHECK-NEXT:    i16x8.narrow_i32x4_u
1957; CHECK-NEXT:    i64x2.extract_lane 0
1958; CHECK-NEXT:    i64.store 32
1959; CHECK-NEXT:    # fallthrough-return
1960  %s = inttoptr i32 32 to <4 x i16>*
1961  store <4 x i16> %v , <4 x i16>* %s
1962  ret void
1963}
1964
1965define void @store_v4i32_to_global_address(<4 x i32> %v) {
1966; CHECK-LABEL: store_v4i32_to_global_address:
1967; CHECK:         .functype store_v4i32_to_global_address (v128) -> ()
1968; CHECK-NEXT:  # %bb.0:
1969; CHECK-NEXT:    i32.const 0
1970; CHECK-NEXT:    local.get 0
1971; CHECK-NEXT:    v128.store gv_v4i32
1972; CHECK-NEXT:    # fallthrough-return
1973  store <4 x i32> %v , <4 x i32>* @gv_v4i32
1974  ret void
1975}
1976
1977define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
1978; CHECK-LABEL: store_narrowing_v4i32_to_global_address:
1979; CHECK:         .functype store_narrowing_v4i32_to_global_address (v128) -> ()
1980; CHECK-NEXT:  # %bb.0:
1981; CHECK-NEXT:    i32.const 0
1982; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1983; CHECK-NEXT:    local.get 0
1984; CHECK-NEXT:    v128.and
1985; CHECK-NEXT:    local.get 0
1986; CHECK-NEXT:    i16x8.narrow_i32x4_u
1987; CHECK-NEXT:    i64x2.extract_lane 0
1988; CHECK-NEXT:    i64.store gv_v4i16
1989; CHECK-NEXT:    # fallthrough-return
1990  store <4 x i16> %v , <4 x i16>* @gv_v4i16
1991  ret void
1992}
1993
1994; ==============================================================================
1995; 2 x i64
1996; ==============================================================================
1997define <2 x i64> @load_v2i64(<2 x i64>* %p) {
1998; CHECK-LABEL: load_v2i64:
1999; CHECK:         .functype load_v2i64 (i32) -> (v128)
2000; CHECK-NEXT:  # %bb.0:
2001; CHECK-NEXT:    local.get 0
2002; CHECK-NEXT:    v128.load 0
2003; CHECK-NEXT:    # fallthrough-return
2004  %v = load <2 x i64>, <2 x i64>* %p
2005  ret <2 x i64> %v
2006}
2007
2008define <2 x i64> @load_splat_v2i64(i64* %p) {
2009; CHECK-LABEL: load_splat_v2i64:
2010; CHECK:         .functype load_splat_v2i64 (i32) -> (v128)
2011; CHECK-NEXT:  # %bb.0:
2012; CHECK-NEXT:    local.get 0
2013; CHECK-NEXT:    v128.load64_splat 0
2014; CHECK-NEXT:    # fallthrough-return
2015  %e = load i64, i64* %p
2016  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2017  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2018  ret <2 x i64> %v2
2019}
2020
2021define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
2022; CHECK-LABEL: load_sext_v2i64:
2023; CHECK:         .functype load_sext_v2i64 (i32) -> (v128)
2024; CHECK-NEXT:  # %bb.0:
2025; CHECK-NEXT:    local.get 0
2026; CHECK-NEXT:    i64x2.load32x2_s 0
2027; CHECK-NEXT:    # fallthrough-return
2028  %v = load <2 x i32>, <2 x i32>* %p
2029  %v2 = sext <2 x i32> %v to <2 x i64>
2030  ret <2 x i64> %v2
2031}
2032
2033define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
2034; CHECK-LABEL: load_zext_v2i64:
2035; CHECK:         .functype load_zext_v2i64 (i32) -> (v128)
2036; CHECK-NEXT:  # %bb.0:
2037; CHECK-NEXT:    local.get 0
2038; CHECK-NEXT:    i64x2.load32x2_u 0
2039; CHECK-NEXT:    # fallthrough-return
2040  %v = load <2 x i32>, <2 x i32>* %p
2041  %v2 = zext <2 x i32> %v to <2 x i64>
2042  ret <2 x i64> %v2
2043}
2044
2045define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
2046; CHECK-LABEL: load_ext_v2i64:
2047; CHECK:         .functype load_ext_v2i64 (i32) -> (v128)
2048; CHECK-NEXT:  # %bb.0:
2049; CHECK-NEXT:    local.get 0
2050; CHECK-NEXT:    i64x2.load32x2_u 0
2051; CHECK-NEXT:    # fallthrough-return
2052  %v = load <2 x i32>, <2 x i32>* %p
2053  ret <2 x i32> %v
2054}
2055
2056define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
2057; CHECK-LABEL: load_v2i64_with_folded_offset:
2058; CHECK:         .functype load_v2i64_with_folded_offset (i32) -> (v128)
2059; CHECK-NEXT:  # %bb.0:
2060; CHECK-NEXT:    local.get 0
2061; CHECK-NEXT:    v128.load 16
2062; CHECK-NEXT:    # fallthrough-return
2063  %q = ptrtoint <2 x i64>* %p to i32
2064  %r = add nuw i32 %q, 16
2065  %s = inttoptr i32 %r to <2 x i64>*
2066  %v = load <2 x i64>, <2 x i64>* %s
2067  ret <2 x i64> %v
2068}
2069
2070define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
2071; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
2072; CHECK:         .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
2073; CHECK-NEXT:  # %bb.0:
2074; CHECK-NEXT:    local.get 0
2075; CHECK-NEXT:    v128.load64_splat 16
2076; CHECK-NEXT:    # fallthrough-return
2077  %q = ptrtoint i64* %p to i32
2078  %r = add nuw i32 %q, 16
2079  %s = inttoptr i32 %r to i64*
2080  %e = load i64, i64* %s
2081  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2082  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2083  ret <2 x i64> %v2
2084}
2085
2086define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
2087; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
2088; CHECK:         .functype load_sext_v2i64_with_folded_offset (i32) -> (v128)
2089; CHECK-NEXT:  # %bb.0:
2090; CHECK-NEXT:    local.get 0
2091; CHECK-NEXT:    i64x2.load32x2_s 16
2092; CHECK-NEXT:    # fallthrough-return
2093  %q = ptrtoint <2 x i32>* %p to i32
2094  %r = add nuw i32 %q, 16
2095  %s = inttoptr i32 %r to <2 x i32>*
2096  %v = load <2 x i32>, <2 x i32>* %s
2097  %v2 = sext <2 x i32> %v to <2 x i64>
2098  ret <2 x i64> %v2
2099}
2100
2101define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
2102; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
2103; CHECK:         .functype load_zext_v2i64_with_folded_offset (i32) -> (v128)
2104; CHECK-NEXT:  # %bb.0:
2105; CHECK-NEXT:    local.get 0
2106; CHECK-NEXT:    i64x2.load32x2_u 16
2107; CHECK-NEXT:    # fallthrough-return
2108  %q = ptrtoint <2 x i32>* %p to i32
2109  %r = add nuw i32 %q, 16
2110  %s = inttoptr i32 %r to <2 x i32>*
2111  %v = load <2 x i32>, <2 x i32>* %s
2112  %v2 = zext <2 x i32> %v to <2 x i64>
2113  ret <2 x i64> %v2
2114}
2115
2116define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
2117; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
2118; CHECK:         .functype load_ext_v2i64_with_folded_offset (i32) -> (v128)
2119; CHECK-NEXT:  # %bb.0:
2120; CHECK-NEXT:    local.get 0
2121; CHECK-NEXT:    i64x2.load32x2_u 16
2122; CHECK-NEXT:    # fallthrough-return
2123  %q = ptrtoint <2 x i32>* %p to i32
2124  %r = add nuw i32 %q, 16
2125  %s = inttoptr i32 %r to <2 x i32>*
2126  %v = load <2 x i32>, <2 x i32>* %s
2127  ret <2 x i32> %v
2128}
2129
2130define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
2131; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
2132; CHECK:         .functype load_v2i64_with_folded_gep_offset (i32) -> (v128)
2133; CHECK-NEXT:  # %bb.0:
2134; CHECK-NEXT:    local.get 0
2135; CHECK-NEXT:    v128.load 16
2136; CHECK-NEXT:    # fallthrough-return
2137  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2138  %v = load <2 x i64>, <2 x i64>* %s
2139  ret <2 x i64> %v
2140}
2141
2142define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
2143; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
2144; CHECK:         .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
2145; CHECK-NEXT:  # %bb.0:
2146; CHECK-NEXT:    local.get 0
2147; CHECK-NEXT:    v128.load64_splat 8
2148; CHECK-NEXT:    # fallthrough-return
2149  %s = getelementptr inbounds i64, i64* %p, i32 1
2150  %e = load i64, i64* %s
2151  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2152  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2153  ret <2 x i64> %v2
2154}
2155
2156define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2157; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
2158; CHECK:         .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128)
2159; CHECK-NEXT:  # %bb.0:
2160; CHECK-NEXT:    local.get 0
2161; CHECK-NEXT:    i64x2.load32x2_s 8
2162; CHECK-NEXT:    # fallthrough-return
2163  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2164  %v = load <2 x i32>, <2 x i32>* %s
2165  %v2 = sext <2 x i32> %v to <2 x i64>
2166  ret <2 x i64> %v2
2167}
2168
2169define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2170; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
2171; CHECK:         .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128)
2172; CHECK-NEXT:  # %bb.0:
2173; CHECK-NEXT:    local.get 0
2174; CHECK-NEXT:    i64x2.load32x2_u 8
2175; CHECK-NEXT:    # fallthrough-return
2176  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2177  %v = load <2 x i32>, <2 x i32>* %s
2178  %v2 = zext <2 x i32> %v to <2 x i64>
2179  ret <2 x i64> %v2
2180}
2181
2182define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2183; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
2184; CHECK:         .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128)
2185; CHECK-NEXT:  # %bb.0:
2186; CHECK-NEXT:    local.get 0
2187; CHECK-NEXT:    i64x2.load32x2_u 8
2188; CHECK-NEXT:    # fallthrough-return
2189  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2190  %v = load <2 x i32>, <2 x i32>* %s
2191  ret <2 x i32> %v
2192}
2193
2194define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
2195; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
2196; CHECK:         .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2197; CHECK-NEXT:  # %bb.0:
2198; CHECK-NEXT:    local.get 0
2199; CHECK-NEXT:    i32.const -16
2200; CHECK-NEXT:    i32.add
2201; CHECK-NEXT:    v128.load 0
2202; CHECK-NEXT:    # fallthrough-return
2203  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2204  %v = load <2 x i64>, <2 x i64>* %s
2205  ret <2 x i64> %v
2206}
2207
2208define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
2209; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
2210; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2211; CHECK-NEXT:  # %bb.0:
2212; CHECK-NEXT:    local.get 0
2213; CHECK-NEXT:    i32.const -8
2214; CHECK-NEXT:    i32.add
2215; CHECK-NEXT:    v128.load64_splat 0
2216; CHECK-NEXT:    # fallthrough-return
2217  %s = getelementptr inbounds i64, i64* %p, i32 -1
2218  %e = load i64, i64* %s
2219  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2220  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2221  ret <2 x i64> %v2
2222}
2223
2224define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2225; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
2226; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2227; CHECK-NEXT:  # %bb.0:
2228; CHECK-NEXT:    local.get 0
2229; CHECK-NEXT:    i32.const -8
2230; CHECK-NEXT:    i32.add
2231; CHECK-NEXT:    i64x2.load32x2_s 0
2232; CHECK-NEXT:    # fallthrough-return
2233  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2234  %v = load <2 x i32>, <2 x i32>* %s
2235  %v2 = sext <2 x i32> %v to <2 x i64>
2236  ret <2 x i64> %v2
2237}
2238
2239define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2240; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
2241; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2242; CHECK-NEXT:  # %bb.0:
2243; CHECK-NEXT:    local.get 0
2244; CHECK-NEXT:    i32.const -8
2245; CHECK-NEXT:    i32.add
2246; CHECK-NEXT:    i64x2.load32x2_u 0
2247; CHECK-NEXT:    # fallthrough-return
2248  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2249  %v = load <2 x i32>, <2 x i32>* %s
2250  %v2 = zext <2 x i32> %v to <2 x i64>
2251  ret <2 x i64> %v2
2252}
2253
2254define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2255; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
2256; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2257; CHECK-NEXT:  # %bb.0:
2258; CHECK-NEXT:    local.get 0
2259; CHECK-NEXT:    i32.const -8
2260; CHECK-NEXT:    i32.add
2261; CHECK-NEXT:    i64x2.load32x2_u 0
2262; CHECK-NEXT:    # fallthrough-return
2263  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2264  %v = load <2 x i32>, <2 x i32>* %s
2265  ret <2 x i32> %v
2266}
2267
2268define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
2269; CHECK-LABEL: load_v2i64_with_unfolded_offset:
2270; CHECK:         .functype load_v2i64_with_unfolded_offset (i32) -> (v128)
2271; CHECK-NEXT:  # %bb.0:
2272; CHECK-NEXT:    local.get 0
2273; CHECK-NEXT:    i32.const 16
2274; CHECK-NEXT:    i32.add
2275; CHECK-NEXT:    v128.load 0
2276; CHECK-NEXT:    # fallthrough-return
2277  %q = ptrtoint <2 x i64>* %p to i32
2278  %r = add nsw i32 %q, 16
2279  %s = inttoptr i32 %r to <2 x i64>*
2280  %v = load <2 x i64>, <2 x i64>* %s
2281  ret <2 x i64> %v
2282}
2283
2284define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
2285; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
2286; CHECK:         .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128)
2287; CHECK-NEXT:  # %bb.0:
2288; CHECK-NEXT:    local.get 0
2289; CHECK-NEXT:    i32.const 16
2290; CHECK-NEXT:    i32.add
2291; CHECK-NEXT:    v128.load64_splat 0
2292; CHECK-NEXT:    # fallthrough-return
2293  %q = ptrtoint i64* %p to i32
2294  %r = add nsw i32 %q, 16
2295  %s = inttoptr i32 %r to i64*
2296  %e = load i64, i64* %s
2297  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2298  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2299  ret <2 x i64> %v2
2300}
2301
2302define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2303; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
2304; CHECK:         .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128)
2305; CHECK-NEXT:  # %bb.0:
2306; CHECK-NEXT:    local.get 0
2307; CHECK-NEXT:    i32.const 16
2308; CHECK-NEXT:    i32.add
2309; CHECK-NEXT:    i64x2.load32x2_s 0
2310; CHECK-NEXT:    # fallthrough-return
2311  %q = ptrtoint <2 x i32>* %p to i32
2312  %r = add nsw i32 %q, 16
2313  %s = inttoptr i32 %r to <2 x i32>*
2314  %v = load <2 x i32>, <2 x i32>* %s
2315  %v2 = sext <2 x i32> %v to <2 x i64>
2316  ret <2 x i64> %v2
2317}
2318
2319define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2320; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
2321; CHECK:         .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128)
2322; CHECK-NEXT:  # %bb.0:
2323; CHECK-NEXT:    local.get 0
2324; CHECK-NEXT:    i32.const 16
2325; CHECK-NEXT:    i32.add
2326; CHECK-NEXT:    i64x2.load32x2_u 0
2327; CHECK-NEXT:    # fallthrough-return
2328  %q = ptrtoint <2 x i32>* %p to i32
2329  %r = add nsw i32 %q, 16
2330  %s = inttoptr i32 %r to <2 x i32>*
2331  %v = load <2 x i32>, <2 x i32>* %s
2332  %v2 = zext <2 x i32> %v to <2 x i64>
2333  ret <2 x i64> %v2
2334}
2335
2336define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2337; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
2338; CHECK:         .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128)
2339; CHECK-NEXT:  # %bb.0:
2340; CHECK-NEXT:    local.get 0
2341; CHECK-NEXT:    i32.const 16
2342; CHECK-NEXT:    i32.add
2343; CHECK-NEXT:    i64x2.load32x2_u 0
2344; CHECK-NEXT:    # fallthrough-return
2345  %q = ptrtoint <2 x i32>* %p to i32
2346  %r = add nsw i32 %q, 16
2347  %s = inttoptr i32 %r to <2 x i32>*
2348  %v = load <2 x i32>, <2 x i32>* %s
2349  ret <2 x i32> %v
2350}
2351
2352define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
2353; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
2354; CHECK:         .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2355; CHECK-NEXT:  # %bb.0:
2356; CHECK-NEXT:    local.get 0
2357; CHECK-NEXT:    i32.const 16
2358; CHECK-NEXT:    i32.add
2359; CHECK-NEXT:    v128.load 0
2360; CHECK-NEXT:    # fallthrough-return
2361  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2362  %v = load <2 x i64>, <2 x i64>* %s
2363  ret <2 x i64> %v
2364}
2365
2366define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
2367; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
2368; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2369; CHECK-NEXT:  # %bb.0:
2370; CHECK-NEXT:    local.get 0
2371; CHECK-NEXT:    i32.const 8
2372; CHECK-NEXT:    i32.add
2373; CHECK-NEXT:    v128.load64_splat 0
2374; CHECK-NEXT:    # fallthrough-return
2375  %s = getelementptr i64, i64* %p, i32 1
2376  %e = load i64, i64* %s
2377  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2378  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2379  ret <2 x i64> %v2
2380}
2381
2382define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2383; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
2384; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2385; CHECK-NEXT:  # %bb.0:
2386; CHECK-NEXT:    local.get 0
2387; CHECK-NEXT:    i32.const 8
2388; CHECK-NEXT:    i32.add
2389; CHECK-NEXT:    i64x2.load32x2_s 0
2390; CHECK-NEXT:    # fallthrough-return
2391  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2392  %v = load <2 x i32>, <2 x i32>* %s
2393  %v2 = sext <2 x i32> %v to <2 x i64>
2394  ret <2 x i64> %v2
2395}
2396
2397define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2398; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
2399; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2400; CHECK-NEXT:  # %bb.0:
2401; CHECK-NEXT:    local.get 0
2402; CHECK-NEXT:    i32.const 8
2403; CHECK-NEXT:    i32.add
2404; CHECK-NEXT:    i64x2.load32x2_u 0
2405; CHECK-NEXT:    # fallthrough-return
2406  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2407  %v = load <2 x i32>, <2 x i32>* %s
2408  %v2 = zext <2 x i32> %v to <2 x i64>
2409  ret <2 x i64> %v2
2410}
2411
2412define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2413; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
2414; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2415; CHECK-NEXT:  # %bb.0:
2416; CHECK-NEXT:    local.get 0
2417; CHECK-NEXT:    i32.const 8
2418; CHECK-NEXT:    i32.add
2419; CHECK-NEXT:    i64x2.load32x2_u 0
2420; CHECK-NEXT:    # fallthrough-return
2421  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2422  %v = load <2 x i32>, <2 x i32>* %s
2423  ret <2 x i32> %v
2424}
2425
2426define <2 x i64> @load_v2i64_from_numeric_address() {
2427; CHECK-LABEL: load_v2i64_from_numeric_address:
2428; CHECK:         .functype load_v2i64_from_numeric_address () -> (v128)
2429; CHECK-NEXT:  # %bb.0:
2430; CHECK-NEXT:    i32.const 0
2431; CHECK-NEXT:    v128.load 32
2432; CHECK-NEXT:    # fallthrough-return
2433  %s = inttoptr i32 32 to <2 x i64>*
2434  %v = load <2 x i64>, <2 x i64>* %s
2435  ret <2 x i64> %v
2436}
2437
2438define <2 x i64> @load_splat_v2i64_from_numeric_address() {
2439; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
2440; CHECK:         .functype load_splat_v2i64_from_numeric_address () -> (v128)
2441; CHECK-NEXT:  # %bb.0:
2442; CHECK-NEXT:    i32.const 0
2443; CHECK-NEXT:    v128.load64_splat 32
2444; CHECK-NEXT:    # fallthrough-return
2445  %s = inttoptr i32 32 to i64*
2446  %e = load i64, i64* %s
2447  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2448  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2449  ret <2 x i64> %v2
2450}
2451
2452define <2 x i64> @load_sext_v2i64_from_numeric_address() {
2453; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
2454; CHECK:         .functype load_sext_v2i64_from_numeric_address () -> (v128)
2455; CHECK-NEXT:  # %bb.0:
2456; CHECK-NEXT:    i32.const 0
2457; CHECK-NEXT:    i64x2.load32x2_s 32
2458; CHECK-NEXT:    # fallthrough-return
2459  %s = inttoptr i32 32 to <2 x i32>*
2460  %v = load <2 x i32>, <2 x i32>* %s
2461  %v2 = sext <2 x i32> %v to <2 x i64>
2462  ret <2 x i64> %v2
2463}
2464
2465define <2 x i64> @load_zext_v2i64_from_numeric_address() {
2466; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
2467; CHECK:         .functype load_zext_v2i64_from_numeric_address () -> (v128)
2468; CHECK-NEXT:  # %bb.0:
2469; CHECK-NEXT:    i32.const 0
2470; CHECK-NEXT:    i64x2.load32x2_u 32
2471; CHECK-NEXT:    # fallthrough-return
2472  %s = inttoptr i32 32 to <2 x i32>*
2473  %v = load <2 x i32>, <2 x i32>* %s
2474  %v2 = zext <2 x i32> %v to <2 x i64>
2475  ret <2 x i64> %v2
2476}
2477
2478define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2479; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2480; CHECK:         .functype load_ext_v2i64_from_numeric_address () -> (v128)
2481; CHECK-NEXT:  # %bb.0:
2482; CHECK-NEXT:    i32.const 0
2483; CHECK-NEXT:    i64x2.load32x2_u 32
2484; CHECK-NEXT:    # fallthrough-return
2485  %s = inttoptr i32 32 to <2 x i32>*
2486  %v = load <2 x i32>, <2 x i32>* %s
2487  ret <2 x i32> %v
2488}
2489
2490@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2491define <2 x i64> @load_v2i64_from_global_address() {
2492; CHECK-LABEL: load_v2i64_from_global_address:
2493; CHECK:         .functype load_v2i64_from_global_address () -> (v128)
2494; CHECK-NEXT:  # %bb.0:
2495; CHECK-NEXT:    i32.const 0
2496; CHECK-NEXT:    v128.load gv_v2i64
2497; CHECK-NEXT:    # fallthrough-return
2498  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
2499  ret <2 x i64> %v
2500}
2501
2502@gv_i64 = global i64 42
2503define <2 x i64> @load_splat_v2i64_from_global_address() {
2504; CHECK-LABEL: load_splat_v2i64_from_global_address:
2505; CHECK:         .functype load_splat_v2i64_from_global_address () -> (v128)
2506; CHECK-NEXT:  # %bb.0:
2507; CHECK-NEXT:    i32.const 0
2508; CHECK-NEXT:    v128.load64_splat gv_i64
2509; CHECK-NEXT:    # fallthrough-return
2510  %e = load i64, i64* @gv_i64
2511  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2512  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2513  ret <2 x i64> %v2
2514}
2515
2516@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2517define <2 x i64> @load_sext_v2i64_from_global_address() {
2518; CHECK-LABEL: load_sext_v2i64_from_global_address:
2519; CHECK:         .functype load_sext_v2i64_from_global_address () -> (v128)
2520; CHECK-NEXT:  # %bb.0:
2521; CHECK-NEXT:    i32.const 0
2522; CHECK-NEXT:    i64x2.load32x2_s gv_v2i32
2523; CHECK-NEXT:    # fallthrough-return
2524  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2525  %v2 = sext <2 x i32> %v to <2 x i64>
2526  ret <2 x i64> %v2
2527}
2528
2529define <2 x i64> @load_zext_v2i64_from_global_address() {
2530; CHECK-LABEL: load_zext_v2i64_from_global_address:
2531; CHECK:         .functype load_zext_v2i64_from_global_address () -> (v128)
2532; CHECK-NEXT:  # %bb.0:
2533; CHECK-NEXT:    i32.const 0
2534; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2535; CHECK-NEXT:    # fallthrough-return
2536  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2537  %v2 = zext <2 x i32> %v to <2 x i64>
2538  ret <2 x i64> %v2
2539}
2540
2541define <2 x i32> @load_ext_v2i64_from_global_address() {
2542; CHECK-LABEL: load_ext_v2i64_from_global_address:
2543; CHECK:         .functype load_ext_v2i64_from_global_address () -> (v128)
2544; CHECK-NEXT:  # %bb.0:
2545; CHECK-NEXT:    i32.const 0
2546; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2547; CHECK-NEXT:    # fallthrough-return
2548  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2549  ret <2 x i32> %v
2550}
2551
2552define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
2553; CHECK-LABEL: store_v2i64:
2554; CHECK:         .functype store_v2i64 (v128, i32) -> ()
2555; CHECK-NEXT:  # %bb.0:
2556; CHECK-NEXT:    local.get 1
2557; CHECK-NEXT:    local.get 0
2558; CHECK-NEXT:    v128.store 0
2559; CHECK-NEXT:    # fallthrough-return
2560  store <2 x i64> %v , <2 x i64>* %p
2561  ret void
2562}
2563
2564define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
2565; CHECK-LABEL: store_v2i64_with_folded_offset:
2566; CHECK:         .functype store_v2i64_with_folded_offset (v128, i32) -> ()
2567; CHECK-NEXT:  # %bb.0:
2568; CHECK-NEXT:    local.get 1
2569; CHECK-NEXT:    local.get 0
2570; CHECK-NEXT:    v128.store 16
2571; CHECK-NEXT:    # fallthrough-return
2572  %q = ptrtoint <2 x i64>* %p to i32
2573  %r = add nuw i32 %q, 16
2574  %s = inttoptr i32 %r to <2 x i64>*
2575  store <2 x i64> %v , <2 x i64>* %s
2576  ret void
2577}
2578
2579define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2580; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2581; CHECK:         .functype store_v2i64_with_folded_gep_offset (v128, i32) -> ()
2582; CHECK-NEXT:  # %bb.0:
2583; CHECK-NEXT:    local.get 1
2584; CHECK-NEXT:    local.get 0
2585; CHECK-NEXT:    v128.store 16
2586; CHECK-NEXT:    # fallthrough-return
2587  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2588  store <2 x i64> %v , <2 x i64>* %s
2589  ret void
2590}
2591
2592define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
2593; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2594; CHECK:         .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
2595; CHECK-NEXT:  # %bb.0:
2596; CHECK-NEXT:    local.get 1
2597; CHECK-NEXT:    i32.const -16
2598; CHECK-NEXT:    i32.add
2599; CHECK-NEXT:    local.get 0
2600; CHECK-NEXT:    v128.store 0
2601; CHECK-NEXT:    # fallthrough-return
2602  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2603  store <2 x i64> %v , <2 x i64>* %s
2604  ret void
2605}
2606
2607define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
2608; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2609; CHECK:         .functype store_v2i64_with_unfolded_offset (v128, i32) -> ()
2610; CHECK-NEXT:  # %bb.0:
2611; CHECK-NEXT:    local.get 1
2612; CHECK-NEXT:    i32.const 16
2613; CHECK-NEXT:    i32.add
2614; CHECK-NEXT:    local.get 0
2615; CHECK-NEXT:    v128.store 0
2616; CHECK-NEXT:    # fallthrough-return
2617  %q = ptrtoint <2 x i64>* %p to i32
2618  %r = add nsw i32 %q, 16
2619  %s = inttoptr i32 %r to <2 x i64>*
2620  store <2 x i64> %v , <2 x i64>* %s
2621  ret void
2622}
2623
2624define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2625; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2626; CHECK:         .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> ()
2627; CHECK-NEXT:  # %bb.0:
2628; CHECK-NEXT:    local.get 1
2629; CHECK-NEXT:    i32.const 16
2630; CHECK-NEXT:    i32.add
2631; CHECK-NEXT:    local.get 0
2632; CHECK-NEXT:    v128.store 0
2633; CHECK-NEXT:    # fallthrough-return
2634  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2635  store <2 x i64> %v , <2 x i64>* %s
2636  ret void
2637}
2638
2639define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2640; CHECK-LABEL: store_v2i64_to_numeric_address:
2641; CHECK:         .functype store_v2i64_to_numeric_address (v128) -> ()
2642; CHECK-NEXT:  # %bb.0:
2643; CHECK-NEXT:    i32.const 0
2644; CHECK-NEXT:    local.get 0
2645; CHECK-NEXT:    v128.store 32
2646; CHECK-NEXT:    # fallthrough-return
2647  %s = inttoptr i32 32 to <2 x i64>*
2648  store <2 x i64> %v , <2 x i64>* %s
2649  ret void
2650}
2651
2652define void @store_v2i64_to_global_address(<2 x i64> %v) {
2653; CHECK-LABEL: store_v2i64_to_global_address:
2654; CHECK:         .functype store_v2i64_to_global_address (v128) -> ()
2655; CHECK-NEXT:  # %bb.0:
2656; CHECK-NEXT:    i32.const 0
2657; CHECK-NEXT:    local.get 0
2658; CHECK-NEXT:    v128.store gv_v2i64
2659; CHECK-NEXT:    # fallthrough-return
2660  store <2 x i64> %v , <2 x i64>* @gv_v2i64
2661  ret void
2662}
2663
2664; ==============================================================================
2665; 4 x float
2666; ==============================================================================
2667define <4 x float> @load_v4f32(<4 x float>* %p) {
2668; CHECK-LABEL: load_v4f32:
2669; CHECK:         .functype load_v4f32 (i32) -> (v128)
2670; CHECK-NEXT:  # %bb.0:
2671; CHECK-NEXT:    local.get 0
2672; CHECK-NEXT:    v128.load 0
2673; CHECK-NEXT:    # fallthrough-return
2674  %v = load <4 x float>, <4 x float>* %p
2675  ret <4 x float> %v
2676}
2677
2678define <4 x float> @load_splat_v4f32(float* %p) {
2679; CHECK-LABEL: load_splat_v4f32:
2680; CHECK:         .functype load_splat_v4f32 (i32) -> (v128)
2681; CHECK-NEXT:  # %bb.0:
2682; CHECK-NEXT:    local.get 0
2683; CHECK-NEXT:    v128.load32_splat 0
2684; CHECK-NEXT:    # fallthrough-return
2685  %e = load float, float* %p
2686  %v1 = insertelement <4 x float> undef, float %e, i32 0
2687  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2688  ret <4 x float> %v2
2689}
2690
2691define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
2692; CHECK-LABEL: load_v4f32_with_folded_offset:
2693; CHECK:         .functype load_v4f32_with_folded_offset (i32) -> (v128)
2694; CHECK-NEXT:  # %bb.0:
2695; CHECK-NEXT:    local.get 0
2696; CHECK-NEXT:    v128.load 16
2697; CHECK-NEXT:    # fallthrough-return
2698  %q = ptrtoint <4 x float>* %p to i32
2699  %r = add nuw i32 %q, 16
2700  %s = inttoptr i32 %r to <4 x float>*
2701  %v = load <4 x float>, <4 x float>* %s
2702  ret <4 x float> %v
2703}
2704
2705define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
2706; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2707; CHECK:         .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
2708; CHECK-NEXT:  # %bb.0:
2709; CHECK-NEXT:    local.get 0
2710; CHECK-NEXT:    v128.load32_splat 16
2711; CHECK-NEXT:    # fallthrough-return
2712  %q = ptrtoint float* %p to i32
2713  %r = add nuw i32 %q, 16
2714  %s = inttoptr i32 %r to float*
2715  %e = load float, float* %s
2716  %v1 = insertelement <4 x float> undef, float %e, i32 0
2717  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2718  ret <4 x float> %v2
2719}
2720
2721define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
2722; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2723; CHECK:         .functype load_v4f32_with_folded_gep_offset (i32) -> (v128)
2724; CHECK-NEXT:  # %bb.0:
2725; CHECK-NEXT:    local.get 0
2726; CHECK-NEXT:    v128.load 16
2727; CHECK-NEXT:    # fallthrough-return
2728  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2729  %v = load <4 x float>, <4 x float>* %s
2730  ret <4 x float> %v
2731}
2732
2733define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
2734; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2735; CHECK:         .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
2736; CHECK-NEXT:  # %bb.0:
2737; CHECK-NEXT:    local.get 0
2738; CHECK-NEXT:    v128.load32_splat 4
2739; CHECK-NEXT:    # fallthrough-return
2740  %s = getelementptr inbounds float, float* %p, i32 1
2741  %e = load float, float* %s
2742  %v1 = insertelement <4 x float> undef, float %e, i32 0
2743  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2744  ret <4 x float> %v2
2745}
2746
2747define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
2748; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2749; CHECK:         .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2750; CHECK-NEXT:  # %bb.0:
2751; CHECK-NEXT:    local.get 0
2752; CHECK-NEXT:    i32.const -16
2753; CHECK-NEXT:    i32.add
2754; CHECK-NEXT:    v128.load 0
2755; CHECK-NEXT:    # fallthrough-return
2756  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2757  %v = load <4 x float>, <4 x float>* %s
2758  ret <4 x float> %v
2759}
2760
2761define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
2762; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2763; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2764; CHECK-NEXT:  # %bb.0:
2765; CHECK-NEXT:    local.get 0
2766; CHECK-NEXT:    i32.const -4
2767; CHECK-NEXT:    i32.add
2768; CHECK-NEXT:    v128.load32_splat 0
2769; CHECK-NEXT:    # fallthrough-return
2770  %s = getelementptr inbounds float, float* %p, i32 -1
2771  %e = load float, float* %s
2772  %v1 = insertelement <4 x float> undef, float %e, i32 0
2773  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2774  ret <4 x float> %v2
2775}
2776
2777define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
2778; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2779; CHECK:         .functype load_v4f32_with_unfolded_offset (i32) -> (v128)
2780; CHECK-NEXT:  # %bb.0:
2781; CHECK-NEXT:    local.get 0
2782; CHECK-NEXT:    i32.const 16
2783; CHECK-NEXT:    i32.add
2784; CHECK-NEXT:    v128.load 0
2785; CHECK-NEXT:    # fallthrough-return
2786  %q = ptrtoint <4 x float>* %p to i32
2787  %r = add nsw i32 %q, 16
2788  %s = inttoptr i32 %r to <4 x float>*
2789  %v = load <4 x float>, <4 x float>* %s
2790  ret <4 x float> %v
2791}
2792
2793define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
2794; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2795; CHECK:         .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128)
2796; CHECK-NEXT:  # %bb.0:
2797; CHECK-NEXT:    local.get 0
2798; CHECK-NEXT:    i32.const 16
2799; CHECK-NEXT:    i32.add
2800; CHECK-NEXT:    v128.load32_splat 0
2801; CHECK-NEXT:    # fallthrough-return
2802  %q = ptrtoint float* %p to i32
2803  %r = add nsw i32 %q, 16
2804  %s = inttoptr i32 %r to float*
2805  %e = load float, float* %s
2806  %v1 = insertelement <4 x float> undef, float %e, i32 0
2807  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2808  ret <4 x float> %v2
2809}
2810
2811define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
2812; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2813; CHECK:         .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2814; CHECK-NEXT:  # %bb.0:
2815; CHECK-NEXT:    local.get 0
2816; CHECK-NEXT:    i32.const 16
2817; CHECK-NEXT:    i32.add
2818; CHECK-NEXT:    v128.load 0
2819; CHECK-NEXT:    # fallthrough-return
2820  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2821  %v = load <4 x float>, <4 x float>* %s
2822  ret <4 x float> %v
2823}
2824
2825define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
2826; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
2827; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2828; CHECK-NEXT:  # %bb.0:
2829; CHECK-NEXT:    local.get 0
2830; CHECK-NEXT:    i32.const 4
2831; CHECK-NEXT:    i32.add
2832; CHECK-NEXT:    v128.load32_splat 0
2833; CHECK-NEXT:    # fallthrough-return
2834  %s = getelementptr float, float* %p, i32 1
2835  %e = load float, float* %s
2836  %v1 = insertelement <4 x float> undef, float %e, i32 0
2837  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2838  ret <4 x float> %v2
2839}
2840
2841define <4 x float> @load_v4f32_from_numeric_address() {
2842; CHECK-LABEL: load_v4f32_from_numeric_address:
2843; CHECK:         .functype load_v4f32_from_numeric_address () -> (v128)
2844; CHECK-NEXT:  # %bb.0:
2845; CHECK-NEXT:    i32.const 0
2846; CHECK-NEXT:    v128.load 32
2847; CHECK-NEXT:    # fallthrough-return
2848  %s = inttoptr i32 32 to <4 x float>*
2849  %v = load <4 x float>, <4 x float>* %s
2850  ret <4 x float> %v
2851}
2852
2853define <4 x float> @load_splat_v4f32_from_numeric_address() {
2854; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
2855; CHECK:         .functype load_splat_v4f32_from_numeric_address () -> (v128)
2856; CHECK-NEXT:  # %bb.0:
2857; CHECK-NEXT:    i32.const 0
2858; CHECK-NEXT:    v128.load32_splat 32
2859; CHECK-NEXT:    # fallthrough-return
2860  %s = inttoptr i32 32 to float*
2861  %e = load float, float* %s
2862  %v1 = insertelement <4 x float> undef, float %e, i32 0
2863  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2864  ret <4 x float> %v2
2865}
2866
2867@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
2868define <4 x float> @load_v4f32_from_global_address() {
2869; CHECK-LABEL: load_v4f32_from_global_address:
2870; CHECK:         .functype load_v4f32_from_global_address () -> (v128)
2871; CHECK-NEXT:  # %bb.0:
2872; CHECK-NEXT:    i32.const 0
2873; CHECK-NEXT:    v128.load gv_v4f32
2874; CHECK-NEXT:    # fallthrough-return
2875  %v = load <4 x float>, <4 x float>* @gv_v4f32
2876  ret <4 x float> %v
2877}
2878
2879@gv_f32 = global float 42.
2880define <4 x float> @load_splat_v4f32_from_global_address() {
2881; CHECK-LABEL: load_splat_v4f32_from_global_address:
2882; CHECK:         .functype load_splat_v4f32_from_global_address () -> (v128)
2883; CHECK-NEXT:  # %bb.0:
2884; CHECK-NEXT:    i32.const 0
2885; CHECK-NEXT:    v128.load32_splat gv_f32
2886; CHECK-NEXT:    # fallthrough-return
2887  %e = load float, float* @gv_f32
2888  %v1 = insertelement <4 x float> undef, float %e, i32 0
2889  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2890  ret <4 x float> %v2
2891}
2892
2893define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
2894; CHECK-LABEL: store_v4f32:
2895; CHECK:         .functype store_v4f32 (v128, i32) -> ()
2896; CHECK-NEXT:  # %bb.0:
2897; CHECK-NEXT:    local.get 1
2898; CHECK-NEXT:    local.get 0
2899; CHECK-NEXT:    v128.store 0
2900; CHECK-NEXT:    # fallthrough-return
2901  store <4 x float> %v , <4 x float>* %p
2902  ret void
2903}
2904
2905define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
2906; CHECK-LABEL: store_v4f32_with_folded_offset:
2907; CHECK:         .functype store_v4f32_with_folded_offset (v128, i32) -> ()
2908; CHECK-NEXT:  # %bb.0:
2909; CHECK-NEXT:    local.get 1
2910; CHECK-NEXT:    local.get 0
2911; CHECK-NEXT:    v128.store 16
2912; CHECK-NEXT:    # fallthrough-return
2913  %q = ptrtoint <4 x float>* %p to i32
2914  %r = add nuw i32 %q, 16
2915  %s = inttoptr i32 %r to <4 x float>*
2916  store <4 x float> %v , <4 x float>* %s
2917  ret void
2918}
2919
2920define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2921; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
2922; CHECK:         .functype store_v4f32_with_folded_gep_offset (v128, i32) -> ()
2923; CHECK-NEXT:  # %bb.0:
2924; CHECK-NEXT:    local.get 1
2925; CHECK-NEXT:    local.get 0
2926; CHECK-NEXT:    v128.store 16
2927; CHECK-NEXT:    # fallthrough-return
2928  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2929  store <4 x float> %v , <4 x float>* %s
2930  ret void
2931}
2932
2933define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
2934; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
2935; CHECK:         .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> ()
2936; CHECK-NEXT:  # %bb.0:
2937; CHECK-NEXT:    local.get 1
2938; CHECK-NEXT:    i32.const -16
2939; CHECK-NEXT:    i32.add
2940; CHECK-NEXT:    local.get 0
2941; CHECK-NEXT:    v128.store 0
2942; CHECK-NEXT:    # fallthrough-return
2943  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2944  store <4 x float> %v , <4 x float>* %s
2945  ret void
2946}
2947
2948define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
2949; CHECK-LABEL: store_v4f32_with_unfolded_offset:
2950; CHECK:         .functype store_v4f32_with_unfolded_offset (v128, i32) -> ()
2951; CHECK-NEXT:  # %bb.0:
2952; CHECK-NEXT:    local.get 1
2953; CHECK-NEXT:    i32.const 16
2954; CHECK-NEXT:    i32.add
2955; CHECK-NEXT:    local.get 0
2956; CHECK-NEXT:    v128.store 0
2957; CHECK-NEXT:    # fallthrough-return
2958  %q = ptrtoint <4 x float>* %p to i32
2959  %r = add nsw i32 %q, 16
2960  %s = inttoptr i32 %r to <4 x float>*
2961  store <4 x float> %v , <4 x float>* %s
2962  ret void
2963}
2964
2965define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2966; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
2967; CHECK:         .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> ()
2968; CHECK-NEXT:  # %bb.0:
2969; CHECK-NEXT:    local.get 1
2970; CHECK-NEXT:    i32.const 16
2971; CHECK-NEXT:    i32.add
2972; CHECK-NEXT:    local.get 0
2973; CHECK-NEXT:    v128.store 0
2974; CHECK-NEXT:    # fallthrough-return
2975  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2976  store <4 x float> %v , <4 x float>* %s
2977  ret void
2978}
2979
2980define void @store_v4f32_to_numeric_address(<4 x float> %v) {
2981; CHECK-LABEL: store_v4f32_to_numeric_address:
2982; CHECK:         .functype store_v4f32_to_numeric_address (v128) -> ()
2983; CHECK-NEXT:  # %bb.0:
2984; CHECK-NEXT:    i32.const 0
2985; CHECK-NEXT:    local.get 0
2986; CHECK-NEXT:    v128.store 32
2987; CHECK-NEXT:    # fallthrough-return
2988  %s = inttoptr i32 32 to <4 x float>*
2989  store <4 x float> %v , <4 x float>* %s
2990  ret void
2991}
2992
2993define void @store_v4f32_to_global_address(<4 x float> %v) {
2994; CHECK-LABEL: store_v4f32_to_global_address:
2995; CHECK:         .functype store_v4f32_to_global_address (v128) -> ()
2996; CHECK-NEXT:  # %bb.0:
2997; CHECK-NEXT:    i32.const 0
2998; CHECK-NEXT:    local.get 0
2999; CHECK-NEXT:    v128.store gv_v4f32
3000; CHECK-NEXT:    # fallthrough-return
3001  store <4 x float> %v , <4 x float>* @gv_v4f32
3002  ret void
3003}
3004
3005; ==============================================================================
3006; 2 x double
3007; ==============================================================================
3008define <2 x double> @load_v2f64(<2 x double>* %p) {
3009; CHECK-LABEL: load_v2f64:
3010; CHECK:         .functype load_v2f64 (i32) -> (v128)
3011; CHECK-NEXT:  # %bb.0:
3012; CHECK-NEXT:    local.get 0
3013; CHECK-NEXT:    v128.load 0
3014; CHECK-NEXT:    # fallthrough-return
3015  %v = load <2 x double>, <2 x double>* %p
3016  ret <2 x double> %v
3017}
3018
3019define <2 x double> @load_splat_v2f64(double* %p) {
3020; CHECK-LABEL: load_splat_v2f64:
3021; CHECK:         .functype load_splat_v2f64 (i32) -> (v128)
3022; CHECK-NEXT:  # %bb.0:
3023; CHECK-NEXT:    local.get 0
3024; CHECK-NEXT:    v128.load64_splat 0
3025; CHECK-NEXT:    # fallthrough-return
3026  %e = load double, double* %p
3027  %v1 = insertelement <2 x double> undef, double %e, i32 0
3028  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3029  ret <2 x double> %v2
3030}
3031
3032define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
3033; CHECK-LABEL: load_v2f64_with_folded_offset:
3034; CHECK:         .functype load_v2f64_with_folded_offset (i32) -> (v128)
3035; CHECK-NEXT:  # %bb.0:
3036; CHECK-NEXT:    local.get 0
3037; CHECK-NEXT:    v128.load 16
3038; CHECK-NEXT:    # fallthrough-return
3039  %q = ptrtoint <2 x double>* %p to i32
3040  %r = add nuw i32 %q, 16
3041  %s = inttoptr i32 %r to <2 x double>*
3042  %v = load <2 x double>, <2 x double>* %s
3043  ret <2 x double> %v
3044}
3045
3046define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
3047; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
3048; CHECK:         .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
3049; CHECK-NEXT:  # %bb.0:
3050; CHECK-NEXT:    local.get 0
3051; CHECK-NEXT:    v128.load64_splat 16
3052; CHECK-NEXT:    # fallthrough-return
3053  %q = ptrtoint double* %p to i32
3054  %r = add nuw i32 %q, 16
3055  %s = inttoptr i32 %r to double*
3056  %e = load double, double* %s
3057  %v1 = insertelement <2 x double> undef, double %e, i32 0
3058  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3059  ret <2 x double> %v2
3060}
3061
3062define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
3063; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
3064; CHECK:         .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
3065; CHECK-NEXT:  # %bb.0:
3066; CHECK-NEXT:    local.get 0
3067; CHECK-NEXT:    v128.load 16
3068; CHECK-NEXT:    # fallthrough-return
3069  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
3070  %v = load <2 x double>, <2 x double>* %s
3071  ret <2 x double> %v
3072}
3073
3074define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
3075; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
3076; CHECK:         .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
3077; CHECK-NEXT:  # %bb.0:
3078; CHECK-NEXT:    local.get 0
3079; CHECK-NEXT:    v128.load64_splat 8
3080; CHECK-NEXT:    # fallthrough-return
3081  %s = getelementptr inbounds double, double* %p, i32 1
3082  %e = load double, double* %s
3083  %v1 = insertelement <2 x double> undef, double %e, i32 0
3084  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3085  ret <2 x double> %v2
3086}
3087
3088define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
3089; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
3090; CHECK:         .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3091; CHECK-NEXT:  # %bb.0:
3092; CHECK-NEXT:    local.get 0
3093; CHECK-NEXT:    i32.const -16
3094; CHECK-NEXT:    i32.add
3095; CHECK-NEXT:    v128.load 0
3096; CHECK-NEXT:    # fallthrough-return
3097  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
3098  %v = load <2 x double>, <2 x double>* %s
3099  ret <2 x double> %v
3100}
3101
3102define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
3103; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
3104; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3105; CHECK-NEXT:  # %bb.0:
3106; CHECK-NEXT:    local.get 0
3107; CHECK-NEXT:    i32.const -8
3108; CHECK-NEXT:    i32.add
3109; CHECK-NEXT:    v128.load64_splat 0
3110; CHECK-NEXT:    # fallthrough-return
3111  %s = getelementptr inbounds double, double* %p, i32 -1
3112  %e = load double, double* %s
3113  %v1 = insertelement <2 x double> undef, double %e, i32 0
3114  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3115  ret <2 x double> %v2
3116}
3117
3118define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
3119; CHECK-LABEL: load_v2f64_with_unfolded_offset:
3120; CHECK:         .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
3121; CHECK-NEXT:  # %bb.0:
3122; CHECK-NEXT:    local.get 0
3123; CHECK-NEXT:    i32.const 16
3124; CHECK-NEXT:    i32.add
3125; CHECK-NEXT:    v128.load 0
3126; CHECK-NEXT:    # fallthrough-return
3127  %q = ptrtoint <2 x double>* %p to i32
3128  %r = add nsw i32 %q, 16
3129  %s = inttoptr i32 %r to <2 x double>*
3130  %v = load <2 x double>, <2 x double>* %s
3131  ret <2 x double> %v
3132}
3133
3134define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
3135; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
3136; CHECK:         .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128)
3137; CHECK-NEXT:  # %bb.0:
3138; CHECK-NEXT:    local.get 0
3139; CHECK-NEXT:    i32.const 16
3140; CHECK-NEXT:    i32.add
3141; CHECK-NEXT:    v128.load64_splat 0
3142; CHECK-NEXT:    # fallthrough-return
3143  %q = ptrtoint double* %p to i32
3144  %r = add nsw i32 %q, 16
3145  %s = inttoptr i32 %r to double*
3146  %e = load double, double* %s
3147  %v1 = insertelement <2 x double> undef, double %e, i32 0
3148  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3149  ret <2 x double> %v2
3150}
3151
3152define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
3153; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
3154; CHECK:         .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3155; CHECK-NEXT:  # %bb.0:
3156; CHECK-NEXT:    local.get 0
3157; CHECK-NEXT:    i32.const 16
3158; CHECK-NEXT:    i32.add
3159; CHECK-NEXT:    v128.load 0
3160; CHECK-NEXT:    # fallthrough-return
3161  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
3162  %v = load <2 x double>, <2 x double>* %s
3163  ret <2 x double> %v
3164}
3165
3166define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
3167; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
3168; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3169; CHECK-NEXT:  # %bb.0:
3170; CHECK-NEXT:    local.get 0
3171; CHECK-NEXT:    i32.const 8
3172; CHECK-NEXT:    i32.add
3173; CHECK-NEXT:    v128.load64_splat 0
3174; CHECK-NEXT:    # fallthrough-return
3175  %s = getelementptr double, double* %p, i32 1
3176  %e = load double, double* %s
3177  %v1 = insertelement <2 x double> undef, double %e, i32 0
3178  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3179  ret <2 x double> %v2
3180}
3181
3182define <2 x double> @load_v2f64_from_numeric_address() {
3183; CHECK-LABEL: load_v2f64_from_numeric_address:
3184; CHECK:         .functype load_v2f64_from_numeric_address () -> (v128)
3185; CHECK-NEXT:  # %bb.0:
3186; CHECK-NEXT:    i32.const 0
3187; CHECK-NEXT:    v128.load 32
3188; CHECK-NEXT:    # fallthrough-return
3189  %s = inttoptr i32 32 to <2 x double>*
3190  %v = load <2 x double>, <2 x double>* %s
3191  ret <2 x double> %v
3192}
3193
3194define <2 x double> @load_splat_v2f64_from_numeric_address() {
3195; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
3196; CHECK:         .functype load_splat_v2f64_from_numeric_address () -> (v128)
3197; CHECK-NEXT:  # %bb.0:
3198; CHECK-NEXT:    i32.const 0
3199; CHECK-NEXT:    v128.load64_splat 32
3200; CHECK-NEXT:    # fallthrough-return
3201  %s = inttoptr i32 32 to double*
3202  %e = load double, double* %s
3203  %v1 = insertelement <2 x double> undef, double %e, i32 0
3204  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3205  ret <2 x double> %v2
3206}
3207
3208@gv_v2f64 = global <2 x double> <double 42., double 42.>
3209define <2 x double> @load_v2f64_from_global_address() {
3210; CHECK-LABEL: load_v2f64_from_global_address:
3211; CHECK:         .functype load_v2f64_from_global_address () -> (v128)
3212; CHECK-NEXT:  # %bb.0:
3213; CHECK-NEXT:    i32.const 0
3214; CHECK-NEXT:    v128.load gv_v2f64
3215; CHECK-NEXT:    # fallthrough-return
3216  %v = load <2 x double>, <2 x double>* @gv_v2f64
3217  ret <2 x double> %v
3218}
3219
3220@gv_f64 = global double 42.
3221define <2 x double> @load_splat_v2f64_from_global_address() {
3222; CHECK-LABEL: load_splat_v2f64_from_global_address:
3223; CHECK:         .functype load_splat_v2f64_from_global_address () -> (v128)
3224; CHECK-NEXT:  # %bb.0:
3225; CHECK-NEXT:    i32.const 0
3226; CHECK-NEXT:    v128.load64_splat gv_f64
3227; CHECK-NEXT:    # fallthrough-return
3228  %e = load double, double* @gv_f64
3229  %v1 = insertelement <2 x double> undef, double %e, i32 0
3230  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3231  ret <2 x double> %v2
3232}
3233
3234define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
3235; CHECK-LABEL: store_v2f64:
3236; CHECK:         .functype store_v2f64 (v128, i32) -> ()
3237; CHECK-NEXT:  # %bb.0:
3238; CHECK-NEXT:    local.get 1
3239; CHECK-NEXT:    local.get 0
3240; CHECK-NEXT:    v128.store 0
3241; CHECK-NEXT:    # fallthrough-return
3242  store <2 x double> %v , <2 x double>* %p
3243  ret void
3244}
3245
3246define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
3247; CHECK-LABEL: store_v2f64_with_folded_offset:
3248; CHECK:         .functype store_v2f64_with_folded_offset (v128, i32) -> ()
3249; CHECK-NEXT:  # %bb.0:
3250; CHECK-NEXT:    local.get 1
3251; CHECK-NEXT:    local.get 0
3252; CHECK-NEXT:    v128.store 16
3253; CHECK-NEXT:    # fallthrough-return
3254  %q = ptrtoint <2 x double>* %p to i32
3255  %r = add nuw i32 %q, 16
3256  %s = inttoptr i32 %r to <2 x double>*
3257  store <2 x double> %v , <2 x double>* %s
3258  ret void
3259}
3260
3261define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
3262; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
3263; CHECK:         .functype store_v2f64_with_folded_gep_offset (v128, i32) -> ()
3264; CHECK-NEXT:  # %bb.0:
3265; CHECK-NEXT:    local.get 1
3266; CHECK-NEXT:    local.get 0
3267; CHECK-NEXT:    v128.store 16
3268; CHECK-NEXT:    # fallthrough-return
3269  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
3270  store <2 x double> %v , <2 x double>* %s
3271  ret void
3272}
3273
3274define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
3275; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
3276; CHECK:         .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> ()
3277; CHECK-NEXT:  # %bb.0:
3278; CHECK-NEXT:    local.get 1
3279; CHECK-NEXT:    i32.const -16
3280; CHECK-NEXT:    i32.add
3281; CHECK-NEXT:    local.get 0
3282; CHECK-NEXT:    v128.store 0
3283; CHECK-NEXT:    # fallthrough-return
3284  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
3285  store <2 x double> %v , <2 x double>* %s
3286  ret void
3287}
3288
3289define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
3290; CHECK-LABEL: store_v2f64_with_unfolded_offset:
3291; CHECK:         .functype store_v2f64_with_unfolded_offset (v128, i32) -> ()
3292; CHECK-NEXT:  # %bb.0:
3293; CHECK-NEXT:    local.get 1
3294; CHECK-NEXT:    i32.const 16
3295; CHECK-NEXT:    i32.add
3296; CHECK-NEXT:    local.get 0
3297; CHECK-NEXT:    v128.store 0
3298; CHECK-NEXT:    # fallthrough-return
3299  %q = ptrtoint <2 x double>* %p to i32
3300  %r = add nsw i32 %q, 16
3301  %s = inttoptr i32 %r to <2 x double>*
3302  store <2 x double> %v , <2 x double>* %s
3303  ret void
3304}
3305
3306define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
3307; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
3308; CHECK:         .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> ()
3309; CHECK-NEXT:  # %bb.0:
3310; CHECK-NEXT:    local.get 1
3311; CHECK-NEXT:    i32.const 16
3312; CHECK-NEXT:    i32.add
3313; CHECK-NEXT:    local.get 0
3314; CHECK-NEXT:    v128.store 0
3315; CHECK-NEXT:    # fallthrough-return
3316  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
3317  store <2 x double> %v , <2 x double>* %s
3318  ret void
3319}
3320
3321define void @store_v2f64_to_numeric_address(<2 x double> %v) {
3322; CHECK-LABEL: store_v2f64_to_numeric_address:
3323; CHECK:         .functype store_v2f64_to_numeric_address (v128) -> ()
3324; CHECK-NEXT:  # %bb.0:
3325; CHECK-NEXT:    i32.const 0
3326; CHECK-NEXT:    local.get 0
3327; CHECK-NEXT:    v128.store 32
3328; CHECK-NEXT:    # fallthrough-return
3329  %s = inttoptr i32 32 to <2 x double>*
3330  store <2 x double> %v , <2 x double>* %s
3331  ret void
3332}
3333
3334define void @store_v2f64_to_global_address(<2 x double> %v) {
3335; CHECK-LABEL: store_v2f64_to_global_address:
3336; CHECK:         .functype store_v2f64_to_global_address (v128) -> ()
3337; CHECK-NEXT:  # %bb.0:
3338; CHECK-NEXT:    i32.const 0
3339; CHECK-NEXT:    local.get 0
3340; CHECK-NEXT:    v128.store gv_v2f64
3341; CHECK-NEXT:    # fallthrough-return
3342  store <2 x double> %v , <2 x double>* @gv_v2f64
3343  ret void
3344}
3345