1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test SIMD loads and stores
5
6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
7target triple = "wasm32-unknown-unknown"
8
9; ==============================================================================
10; 16 x i8
11; ==============================================================================
12define <16 x i8> @load_v16i8(<16 x i8>* %p) {
13; CHECK-LABEL: load_v16i8:
14; CHECK:         .functype load_v16i8 (i32) -> (v128)
15; CHECK-NEXT:  # %bb.0:
16; CHECK-NEXT:    local.get 0
17; CHECK-NEXT:    v128.load 0
18; CHECK-NEXT:    # fallthrough-return
19  %v = load <16 x i8>, <16 x i8>* %p
20  ret <16 x i8> %v
21}
22
23define <16 x i8> @load_splat_v16i8(i8* %p) {
24; CHECK-LABEL: load_splat_v16i8:
25; CHECK:         .functype load_splat_v16i8 (i32) -> (v128)
26; CHECK-NEXT:  # %bb.0:
27; CHECK-NEXT:    local.get 0
28; CHECK-NEXT:    v128.load8_splat 0
29; CHECK-NEXT:    # fallthrough-return
30  %e = load i8, i8* %p
31  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
32  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
33  ret <16 x i8> %v2
34}
35
36define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
37; CHECK-LABEL: load_v16i8_with_folded_offset:
38; CHECK:         .functype load_v16i8_with_folded_offset (i32) -> (v128)
39; CHECK-NEXT:  # %bb.0:
40; CHECK-NEXT:    local.get 0
41; CHECK-NEXT:    v128.load 16
42; CHECK-NEXT:    # fallthrough-return
43  %q = ptrtoint <16 x i8>* %p to i32
44  %r = add nuw i32 %q, 16
45  %s = inttoptr i32 %r to <16 x i8>*
46  %v = load <16 x i8>, <16 x i8>* %s
47  ret <16 x i8> %v
48}
49
50define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
51; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
52; CHECK:         .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
53; CHECK-NEXT:  # %bb.0:
54; CHECK-NEXT:    local.get 0
55; CHECK-NEXT:    v128.load8_splat 16
56; CHECK-NEXT:    # fallthrough-return
57  %q = ptrtoint i8* %p to i32
58  %r = add nuw i32 %q, 16
59  %s = inttoptr i32 %r to i8*
60  %e = load i8, i8* %s
61  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
62  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
63  ret <16 x i8> %v2
64}
65
66define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
67; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
68; CHECK:         .functype load_v16i8_with_folded_gep_offset (i32) -> (v128)
69; CHECK-NEXT:  # %bb.0:
70; CHECK-NEXT:    local.get 0
71; CHECK-NEXT:    v128.load 16
72; CHECK-NEXT:    # fallthrough-return
73  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
74  %v = load <16 x i8>, <16 x i8>* %s
75  ret <16 x i8> %v
76}
77
78define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
79; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
80; CHECK:         .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
81; CHECK-NEXT:  # %bb.0:
82; CHECK-NEXT:    local.get 0
83; CHECK-NEXT:    v128.load8_splat 1
84; CHECK-NEXT:    # fallthrough-return
85  %s = getelementptr inbounds i8, i8* %p, i32 1
86  %e = load i8, i8* %s
87  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
88  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
89  ret <16 x i8> %v2
90}
91
92define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
93; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
94; CHECK:         .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
95; CHECK-NEXT:  # %bb.0:
96; CHECK-NEXT:    local.get 0
97; CHECK-NEXT:    i32.const -16
98; CHECK-NEXT:    i32.add
99; CHECK-NEXT:    v128.load 0
100; CHECK-NEXT:    # fallthrough-return
101  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
102  %v = load <16 x i8>, <16 x i8>* %s
103  ret <16 x i8> %v
104}
105
106define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
107; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
108; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
109; CHECK-NEXT:  # %bb.0:
110; CHECK-NEXT:    local.get 0
111; CHECK-NEXT:    i32.const -1
112; CHECK-NEXT:    i32.add
113; CHECK-NEXT:    v128.load8_splat 0
114; CHECK-NEXT:    # fallthrough-return
115  %s = getelementptr inbounds i8, i8* %p, i32 -1
116  %e = load i8, i8* %s
117  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
118  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
119  ret <16 x i8> %v2
120}
121
122define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
123; CHECK-LABEL: load_v16i8_with_unfolded_offset:
124; CHECK:         .functype load_v16i8_with_unfolded_offset (i32) -> (v128)
125; CHECK-NEXT:  # %bb.0:
126; CHECK-NEXT:    local.get 0
127; CHECK-NEXT:    i32.const 16
128; CHECK-NEXT:    i32.add
129; CHECK-NEXT:    v128.load 0
130; CHECK-NEXT:    # fallthrough-return
131  %q = ptrtoint <16 x i8>* %p to i32
132  %r = add nsw i32 %q, 16
133  %s = inttoptr i32 %r to <16 x i8>*
134  %v = load <16 x i8>, <16 x i8>* %s
135  ret <16 x i8> %v
136}
137
138define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
139; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
140; CHECK:         .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128)
141; CHECK-NEXT:  # %bb.0:
142; CHECK-NEXT:    local.get 0
143; CHECK-NEXT:    i32.const 16
144; CHECK-NEXT:    i32.add
145; CHECK-NEXT:    v128.load8_splat 0
146; CHECK-NEXT:    # fallthrough-return
147  %q = ptrtoint i8* %p to i32
148  %r = add nsw i32 %q, 16
149  %s = inttoptr i32 %r to i8*
150  %e = load i8, i8* %s
151  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
152  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
153  ret <16 x i8> %v2
154}
155
156define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
157; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
158; CHECK:         .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128)
159; CHECK-NEXT:  # %bb.0:
160; CHECK-NEXT:    local.get 0
161; CHECK-NEXT:    i32.const 16
162; CHECK-NEXT:    i32.add
163; CHECK-NEXT:    v128.load 0
164; CHECK-NEXT:    # fallthrough-return
165  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
166  %v = load <16 x i8>, <16 x i8>* %s
167  ret <16 x i8> %v
168}
169
170define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
171; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
172; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128)
173; CHECK-NEXT:  # %bb.0:
174; CHECK-NEXT:    local.get 0
175; CHECK-NEXT:    i32.const 1
176; CHECK-NEXT:    i32.add
177; CHECK-NEXT:    v128.load8_splat 0
178; CHECK-NEXT:    # fallthrough-return
179  %s = getelementptr i8, i8* %p, i32 1
180  %e = load i8, i8* %s
181  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
182  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
183  ret <16 x i8> %v2
184}
185
186define <16 x i8> @load_v16i8_from_numeric_address() {
187; CHECK-LABEL: load_v16i8_from_numeric_address:
188; CHECK:         .functype load_v16i8_from_numeric_address () -> (v128)
189; CHECK-NEXT:  # %bb.0:
190; CHECK-NEXT:    i32.const 0
191; CHECK-NEXT:    v128.load 32
192; CHECK-NEXT:    # fallthrough-return
193  %s = inttoptr i32 32 to <16 x i8>*
194  %v = load <16 x i8>, <16 x i8>* %s
195  ret <16 x i8> %v
196}
197
198define <16 x i8> @load_splat_v16i8_from_numeric_address() {
199; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
200; CHECK:         .functype load_splat_v16i8_from_numeric_address () -> (v128)
201; CHECK-NEXT:  # %bb.0:
202; CHECK-NEXT:    i32.const 0
203; CHECK-NEXT:    v128.load8_splat 32
204; CHECK-NEXT:    # fallthrough-return
205  %s = inttoptr i32 32 to i8*
206  %e = load i8, i8* %s
207  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
208  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
209  ret <16 x i8> %v2
210}
211
212@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
213define <16 x i8> @load_v16i8_from_global_address() {
214; CHECK-LABEL: load_v16i8_from_global_address:
215; CHECK:         .functype load_v16i8_from_global_address () -> (v128)
216; CHECK-NEXT:  # %bb.0:
217; CHECK-NEXT:    i32.const 0
218; CHECK-NEXT:    v128.load gv_v16i8
219; CHECK-NEXT:    # fallthrough-return
220  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
221  ret <16 x i8> %v
222}
223
224@gv_i8 = global i8 42
225define <16 x i8> @load_splat_v16i8_from_global_address() {
226; CHECK-LABEL: load_splat_v16i8_from_global_address:
227; CHECK:         .functype load_splat_v16i8_from_global_address () -> (v128)
228; CHECK-NEXT:  # %bb.0:
229; CHECK-NEXT:    i32.const 0
230; CHECK-NEXT:    v128.load8_splat gv_i8
231; CHECK-NEXT:    # fallthrough-return
232  %e = load i8, i8* @gv_i8
233  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
234  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
235  ret <16 x i8> %v2
236}
237
238define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
239; CHECK-LABEL: store_v16i8:
240; CHECK:         .functype store_v16i8 (v128, i32) -> ()
241; CHECK-NEXT:  # %bb.0:
242; CHECK-NEXT:    local.get 1
243; CHECK-NEXT:    local.get 0
244; CHECK-NEXT:    v128.store 0
245; CHECK-NEXT:    # fallthrough-return
246  store <16 x i8> %v , <16 x i8>* %p
247  ret void
248}
249
250define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
251; CHECK-LABEL: store_v16i8_with_folded_offset:
252; CHECK:         .functype store_v16i8_with_folded_offset (v128, i32) -> ()
253; CHECK-NEXT:  # %bb.0:
254; CHECK-NEXT:    local.get 1
255; CHECK-NEXT:    local.get 0
256; CHECK-NEXT:    v128.store 16
257; CHECK-NEXT:    # fallthrough-return
258  %q = ptrtoint <16 x i8>* %p to i32
259  %r = add nuw i32 %q, 16
260  %s = inttoptr i32 %r to <16 x i8>*
261  store <16 x i8> %v , <16 x i8>* %s
262  ret void
263}
264
265define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
266; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
267; CHECK:         .functype store_v16i8_with_folded_gep_offset (v128, i32) -> ()
268; CHECK-NEXT:  # %bb.0:
269; CHECK-NEXT:    local.get 1
270; CHECK-NEXT:    local.get 0
271; CHECK-NEXT:    v128.store 16
272; CHECK-NEXT:    # fallthrough-return
273  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
274  store <16 x i8> %v , <16 x i8>* %s
275  ret void
276}
277
278define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
279; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
280; CHECK:         .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
281; CHECK-NEXT:  # %bb.0:
282; CHECK-NEXT:    local.get 1
283; CHECK-NEXT:    i32.const -16
284; CHECK-NEXT:    i32.add
285; CHECK-NEXT:    local.get 0
286; CHECK-NEXT:    v128.store 0
287; CHECK-NEXT:    # fallthrough-return
288  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
289  store <16 x i8> %v , <16 x i8>* %s
290  ret void
291}
292
293define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
294; CHECK-LABEL: store_v16i8_with_unfolded_offset:
295; CHECK:         .functype store_v16i8_with_unfolded_offset (v128, i32) -> ()
296; CHECK-NEXT:  # %bb.0:
297; CHECK-NEXT:    local.get 1
298; CHECK-NEXT:    i32.const 16
299; CHECK-NEXT:    i32.add
300; CHECK-NEXT:    local.get 0
301; CHECK-NEXT:    v128.store 0
302; CHECK-NEXT:    # fallthrough-return
303  %q = ptrtoint <16 x i8>* %p to i32
304  %r = add nsw i32 %q, 16
305  %s = inttoptr i32 %r to <16 x i8>*
306  store <16 x i8> %v , <16 x i8>* %s
307  ret void
308}
309
310define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
311; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
312; CHECK:         .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> ()
313; CHECK-NEXT:  # %bb.0:
314; CHECK-NEXT:    local.get 1
315; CHECK-NEXT:    i32.const 16
316; CHECK-NEXT:    i32.add
317; CHECK-NEXT:    local.get 0
318; CHECK-NEXT:    v128.store 0
319; CHECK-NEXT:    # fallthrough-return
320  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
321  store <16 x i8> %v , <16 x i8>* %s
322  ret void
323}
324
325define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
326; CHECK-LABEL: store_v16i8_to_numeric_address:
327; CHECK:         .functype store_v16i8_to_numeric_address (v128) -> ()
328; CHECK-NEXT:  # %bb.0:
329; CHECK-NEXT:    i32.const 0
330; CHECK-NEXT:    local.get 0
331; CHECK-NEXT:    v128.store 32
332; CHECK-NEXT:    # fallthrough-return
333  %s = inttoptr i32 32 to <16 x i8>*
334  store <16 x i8> %v , <16 x i8>* %s
335  ret void
336}
337
338define void @store_v16i8_to_global_address(<16 x i8> %v) {
339; CHECK-LABEL: store_v16i8_to_global_address:
340; CHECK:         .functype store_v16i8_to_global_address (v128) -> ()
341; CHECK-NEXT:  # %bb.0:
342; CHECK-NEXT:    i32.const 0
343; CHECK-NEXT:    local.get 0
344; CHECK-NEXT:    v128.store gv_v16i8
345; CHECK-NEXT:    # fallthrough-return
346  store <16 x i8> %v , <16 x i8>* @gv_v16i8
347  ret void
348}
349
350; ==============================================================================
351; 8 x i16
352; ==============================================================================
353define <8 x i16> @load_v8i16(<8 x i16>* %p) {
354; CHECK-LABEL: load_v8i16:
355; CHECK:         .functype load_v8i16 (i32) -> (v128)
356; CHECK-NEXT:  # %bb.0:
357; CHECK-NEXT:    local.get 0
358; CHECK-NEXT:    v128.load 0
359; CHECK-NEXT:    # fallthrough-return
360  %v = load <8 x i16>, <8 x i16>* %p
361  ret <8 x i16> %v
362}
363
364define <8 x i16> @load_splat_v8i16(i16* %p) {
365; CHECK-LABEL: load_splat_v8i16:
366; CHECK:         .functype load_splat_v8i16 (i32) -> (v128)
367; CHECK-NEXT:  # %bb.0:
368; CHECK-NEXT:    local.get 0
369; CHECK-NEXT:    v128.load16_splat 0
370; CHECK-NEXT:    # fallthrough-return
371  %e = load i16, i16* %p
372  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
373  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
374  ret <8 x i16> %v2
375}
376
377define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
378; CHECK-LABEL: load_sext_v8i16:
379; CHECK:         .functype load_sext_v8i16 (i32) -> (v128)
380; CHECK-NEXT:  # %bb.0:
381; CHECK-NEXT:    local.get 0
382; CHECK-NEXT:    i16x8.load8x8_s 0
383; CHECK-NEXT:    # fallthrough-return
384  %v = load <8 x i8>, <8 x i8>* %p
385  %v2 = sext <8 x i8> %v to <8 x i16>
386  ret <8 x i16> %v2
387}
388
389define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
390; CHECK-LABEL: load_zext_v8i16:
391; CHECK:         .functype load_zext_v8i16 (i32) -> (v128)
392; CHECK-NEXT:  # %bb.0:
393; CHECK-NEXT:    local.get 0
394; CHECK-NEXT:    i16x8.load8x8_u 0
395; CHECK-NEXT:    # fallthrough-return
396  %v = load <8 x i8>, <8 x i8>* %p
397  %v2 = zext <8 x i8> %v to <8 x i16>
398  ret <8 x i16> %v2
399}
400
401define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
402; CHECK-LABEL: load_ext_v8i16:
403; CHECK:         .functype load_ext_v8i16 (i32) -> (v128)
404; CHECK-NEXT:  # %bb.0:
405; CHECK-NEXT:    local.get 0
406; CHECK-NEXT:    i16x8.load8x8_u 0
407; CHECK-NEXT:    # fallthrough-return
408  %v = load <8 x i8>, <8 x i8>* %p
409  ret <8 x i8> %v
410}
411
412define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
413; CHECK-LABEL: load_v8i16_with_folded_offset:
414; CHECK:         .functype load_v8i16_with_folded_offset (i32) -> (v128)
415; CHECK-NEXT:  # %bb.0:
416; CHECK-NEXT:    local.get 0
417; CHECK-NEXT:    v128.load 16
418; CHECK-NEXT:    # fallthrough-return
419  %q = ptrtoint <8 x i16>* %p to i32
420  %r = add nuw i32 %q, 16
421  %s = inttoptr i32 %r to <8 x i16>*
422  %v = load <8 x i16>, <8 x i16>* %s
423  ret <8 x i16> %v
424}
425
426define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
427; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
428; CHECK:         .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
429; CHECK-NEXT:  # %bb.0:
430; CHECK-NEXT:    local.get 0
431; CHECK-NEXT:    v128.load16_splat 16
432; CHECK-NEXT:    # fallthrough-return
433  %q = ptrtoint i16* %p to i32
434  %r = add nuw i32 %q, 16
435  %s = inttoptr i32 %r to i16*
436  %e = load i16, i16* %s
437  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
438  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
439  ret <8 x i16> %v2
440}
441
442define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
443; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
444; CHECK:         .functype load_sext_v8i16_with_folded_offset (i32) -> (v128)
445; CHECK-NEXT:  # %bb.0:
446; CHECK-NEXT:    local.get 0
447; CHECK-NEXT:    i16x8.load8x8_s 16
448; CHECK-NEXT:    # fallthrough-return
449  %q = ptrtoint <8 x i8>* %p to i32
450  %r = add nuw i32 %q, 16
451  %s = inttoptr i32 %r to <8 x i8>*
452  %v = load <8 x i8>, <8 x i8>* %s
453  %v2 = sext <8 x i8> %v to <8 x i16>
454  ret <8 x i16> %v2
455}
456
457define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
458; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
459; CHECK:         .functype load_zext_v8i16_with_folded_offset (i32) -> (v128)
460; CHECK-NEXT:  # %bb.0:
461; CHECK-NEXT:    local.get 0
462; CHECK-NEXT:    i16x8.load8x8_u 16
463; CHECK-NEXT:    # fallthrough-return
464  %q = ptrtoint <8 x i8>* %p to i32
465  %r = add nuw i32 %q, 16
466  %s = inttoptr i32 %r to <8 x i8>*
467  %v = load <8 x i8>, <8 x i8>* %s
468  %v2 = zext <8 x i8> %v to <8 x i16>
469  ret <8 x i16> %v2
470}
471
472define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
473; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
474; CHECK:         .functype load_ext_v8i16_with_folded_offset (i32) -> (v128)
475; CHECK-NEXT:  # %bb.0:
476; CHECK-NEXT:    local.get 0
477; CHECK-NEXT:    i16x8.load8x8_u 16
478; CHECK-NEXT:    # fallthrough-return
479  %q = ptrtoint <8 x i8>* %p to i32
480  %r = add nuw i32 %q, 16
481  %s = inttoptr i32 %r to <8 x i8>*
482  %v = load <8 x i8>, <8 x i8>* %s
483  ret <8 x i8> %v
484}
485
486define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
487; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
488; CHECK:         .functype load_v8i16_with_folded_gep_offset (i32) -> (v128)
489; CHECK-NEXT:  # %bb.0:
490; CHECK-NEXT:    local.get 0
491; CHECK-NEXT:    v128.load 16
492; CHECK-NEXT:    # fallthrough-return
493  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
494  %v = load <8 x i16>, <8 x i16>* %s
495  ret <8 x i16> %v
496}
497
498define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
499; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
500; CHECK:         .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
501; CHECK-NEXT:  # %bb.0:
502; CHECK-NEXT:    local.get 0
503; CHECK-NEXT:    v128.load16_splat 2
504; CHECK-NEXT:    # fallthrough-return
505  %s = getelementptr inbounds i16, i16* %p, i32 1
506  %e = load i16, i16* %s
507  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
508  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
509  ret <8 x i16> %v2
510}
511
512define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
513; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
514; CHECK:         .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128)
515; CHECK-NEXT:  # %bb.0:
516; CHECK-NEXT:    local.get 0
517; CHECK-NEXT:    i16x8.load8x8_s 8
518; CHECK-NEXT:    # fallthrough-return
519  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
520  %v = load <8 x i8>, <8 x i8>* %s
521  %v2 = sext <8 x i8> %v to <8 x i16>
522  ret <8 x i16> %v2
523}
524
525define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
526; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
527; CHECK:         .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128)
528; CHECK-NEXT:  # %bb.0:
529; CHECK-NEXT:    local.get 0
530; CHECK-NEXT:    i16x8.load8x8_u 8
531; CHECK-NEXT:    # fallthrough-return
532  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
533  %v = load <8 x i8>, <8 x i8>* %s
534  %v2 = zext <8 x i8> %v to <8 x i16>
535  ret <8 x i16> %v2
536}
537
538define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
539; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
540; CHECK:         .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128)
541; CHECK-NEXT:  # %bb.0:
542; CHECK-NEXT:    local.get 0
543; CHECK-NEXT:    i16x8.load8x8_u 8
544; CHECK-NEXT:    # fallthrough-return
545  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
546  %v = load <8 x i8>, <8 x i8>* %s
547  ret <8 x i8> %v
548}
549
550define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
551; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
552; CHECK:         .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
553; CHECK-NEXT:  # %bb.0:
554; CHECK-NEXT:    local.get 0
555; CHECK-NEXT:    i32.const -16
556; CHECK-NEXT:    i32.add
557; CHECK-NEXT:    v128.load 0
558; CHECK-NEXT:    # fallthrough-return
559  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
560  %v = load <8 x i16>, <8 x i16>* %s
561  ret <8 x i16> %v
562}
563
564define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
565; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
566; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
567; CHECK-NEXT:  # %bb.0:
568; CHECK-NEXT:    local.get 0
569; CHECK-NEXT:    i32.const -2
570; CHECK-NEXT:    i32.add
571; CHECK-NEXT:    v128.load16_splat 0
572; CHECK-NEXT:    # fallthrough-return
573  %s = getelementptr inbounds i16, i16* %p, i32 -1
574  %e = load i16, i16* %s
575  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
576  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
577  ret <8 x i16> %v2
578}
579
580define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
581; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
582; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
583; CHECK-NEXT:  # %bb.0:
584; CHECK-NEXT:    local.get 0
585; CHECK-NEXT:    i32.const -8
586; CHECK-NEXT:    i32.add
587; CHECK-NEXT:    i16x8.load8x8_s 0
588; CHECK-NEXT:    # fallthrough-return
589  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
590  %v = load <8 x i8>, <8 x i8>* %s
591  %v2 = sext <8 x i8> %v to <8 x i16>
592  ret <8 x i16> %v2
593}
594
595define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
596; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
597; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
598; CHECK-NEXT:  # %bb.0:
599; CHECK-NEXT:    local.get 0
600; CHECK-NEXT:    i32.const -8
601; CHECK-NEXT:    i32.add
602; CHECK-NEXT:    i16x8.load8x8_u 0
603; CHECK-NEXT:    # fallthrough-return
604  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
605  %v = load <8 x i8>, <8 x i8>* %s
606  %v2 = zext <8 x i8> %v to <8 x i16>
607  ret <8 x i16> %v2
608}
609
610define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
611; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
612; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
613; CHECK-NEXT:  # %bb.0:
614; CHECK-NEXT:    local.get 0
615; CHECK-NEXT:    i32.const -8
616; CHECK-NEXT:    i32.add
617; CHECK-NEXT:    i16x8.load8x8_u 0
618; CHECK-NEXT:    # fallthrough-return
619  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
620  %v = load <8 x i8>, <8 x i8>* %s
621  ret <8 x i8> %v
622}
623
624define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
625; CHECK-LABEL: load_v8i16_with_unfolded_offset:
626; CHECK:         .functype load_v8i16_with_unfolded_offset (i32) -> (v128)
627; CHECK-NEXT:  # %bb.0:
628; CHECK-NEXT:    local.get 0
629; CHECK-NEXT:    i32.const 16
630; CHECK-NEXT:    i32.add
631; CHECK-NEXT:    v128.load 0
632; CHECK-NEXT:    # fallthrough-return
633  %q = ptrtoint <8 x i16>* %p to i32
634  %r = add nsw i32 %q, 16
635  %s = inttoptr i32 %r to <8 x i16>*
636  %v = load <8 x i16>, <8 x i16>* %s
637  ret <8 x i16> %v
638}
639
640define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
641; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
642; CHECK:         .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128)
643; CHECK-NEXT:  # %bb.0:
644; CHECK-NEXT:    local.get 0
645; CHECK-NEXT:    i32.const 16
646; CHECK-NEXT:    i32.add
647; CHECK-NEXT:    v128.load16_splat 0
648; CHECK-NEXT:    # fallthrough-return
649  %q = ptrtoint i16* %p to i32
650  %r = add nsw i32 %q, 16
651  %s = inttoptr i32 %r to i16*
652  %e = load i16, i16* %s
653  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
654  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
655  ret <8 x i16> %v2
656}
657
658define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
659; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
660; CHECK:         .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128)
661; CHECK-NEXT:  # %bb.0:
662; CHECK-NEXT:    local.get 0
663; CHECK-NEXT:    i32.const 16
664; CHECK-NEXT:    i32.add
665; CHECK-NEXT:    i16x8.load8x8_s 0
666; CHECK-NEXT:    # fallthrough-return
667  %q = ptrtoint <8 x i8>* %p to i32
668  %r = add nsw i32 %q, 16
669  %s = inttoptr i32 %r to <8 x i8>*
670  %v = load <8 x i8>, <8 x i8>* %s
671  %v2 = sext <8 x i8> %v to <8 x i16>
672  ret <8 x i16> %v2
673}
674
675define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
676; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
677; CHECK:         .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128)
678; CHECK-NEXT:  # %bb.0:
679; CHECK-NEXT:    local.get 0
680; CHECK-NEXT:    i32.const 16
681; CHECK-NEXT:    i32.add
682; CHECK-NEXT:    i16x8.load8x8_u 0
683; CHECK-NEXT:    # fallthrough-return
684  %q = ptrtoint <8 x i8>* %p to i32
685  %r = add nsw i32 %q, 16
686  %s = inttoptr i32 %r to <8 x i8>*
687  %v = load <8 x i8>, <8 x i8>* %s
688  %v2 = zext <8 x i8> %v to <8 x i16>
689  ret <8 x i16> %v2
690}
691
692define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
693; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
694; CHECK:         .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128)
695; CHECK-NEXT:  # %bb.0:
696; CHECK-NEXT:    local.get 0
697; CHECK-NEXT:    i32.const 16
698; CHECK-NEXT:    i32.add
699; CHECK-NEXT:    i16x8.load8x8_u 0
700; CHECK-NEXT:    # fallthrough-return
701  %q = ptrtoint <8 x i8>* %p to i32
702  %r = add nsw i32 %q, 16
703  %s = inttoptr i32 %r to <8 x i8>*
704  %v = load <8 x i8>, <8 x i8>* %s
705  ret <8 x i8> %v
706}
707
708define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
709; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
710; CHECK:         .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128)
711; CHECK-NEXT:  # %bb.0:
712; CHECK-NEXT:    local.get 0
713; CHECK-NEXT:    i32.const 16
714; CHECK-NEXT:    i32.add
715; CHECK-NEXT:    v128.load 0
716; CHECK-NEXT:    # fallthrough-return
717  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
718  %v = load <8 x i16>, <8 x i16>* %s
719  ret <8 x i16> %v
720}
721
722define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
723; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
724; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128)
725; CHECK-NEXT:  # %bb.0:
726; CHECK-NEXT:    local.get 0
727; CHECK-NEXT:    i32.const 2
728; CHECK-NEXT:    i32.add
729; CHECK-NEXT:    v128.load16_splat 0
730; CHECK-NEXT:    # fallthrough-return
731  %s = getelementptr i16, i16* %p, i32 1
732  %e = load i16, i16* %s
733  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
734  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
735  ret <8 x i16> %v2
736}
737
738define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
739; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
740; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
741; CHECK-NEXT:  # %bb.0:
742; CHECK-NEXT:    local.get 0
743; CHECK-NEXT:    i32.const 8
744; CHECK-NEXT:    i32.add
745; CHECK-NEXT:    i16x8.load8x8_s 0
746; CHECK-NEXT:    # fallthrough-return
747  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
748  %v = load <8 x i8>, <8 x i8>* %s
749  %v2 = sext <8 x i8> %v to <8 x i16>
750  ret <8 x i16> %v2
751}
752
753define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
754; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
755; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
756; CHECK-NEXT:  # %bb.0:
757; CHECK-NEXT:    local.get 0
758; CHECK-NEXT:    i32.const 8
759; CHECK-NEXT:    i32.add
760; CHECK-NEXT:    i16x8.load8x8_u 0
761; CHECK-NEXT:    # fallthrough-return
762  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
763  %v = load <8 x i8>, <8 x i8>* %s
764  %v2 = zext <8 x i8> %v to <8 x i16>
765  ret <8 x i16> %v2
766}
767
768define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
769; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
770; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
771; CHECK-NEXT:  # %bb.0:
772; CHECK-NEXT:    local.get 0
773; CHECK-NEXT:    i32.const 8
774; CHECK-NEXT:    i32.add
775; CHECK-NEXT:    i16x8.load8x8_u 0
776; CHECK-NEXT:    # fallthrough-return
777  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
778  %v = load <8 x i8>, <8 x i8>* %s
779  ret <8 x i8> %v
780}
781
782define <8 x i16> @load_v8i16_from_numeric_address() {
783; CHECK-LABEL: load_v8i16_from_numeric_address:
784; CHECK:         .functype load_v8i16_from_numeric_address () -> (v128)
785; CHECK-NEXT:  # %bb.0:
786; CHECK-NEXT:    i32.const 0
787; CHECK-NEXT:    v128.load 32
788; CHECK-NEXT:    # fallthrough-return
789  %s = inttoptr i32 32 to <8 x i16>*
790  %v = load <8 x i16>, <8 x i16>* %s
791  ret <8 x i16> %v
792}
793
794define <8 x i16> @load_splat_v8i16_from_numeric_address() {
795; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
796; CHECK:         .functype load_splat_v8i16_from_numeric_address () -> (v128)
797; CHECK-NEXT:  # %bb.0:
798; CHECK-NEXT:    i32.const 0
799; CHECK-NEXT:    v128.load16_splat 32
800; CHECK-NEXT:    # fallthrough-return
801  %s = inttoptr i32 32 to i16*
802  %e = load i16, i16* %s
803  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
804  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
805  ret <8 x i16> %v2
806}
807
808define <8 x i16> @load_sext_v8i16_from_numeric_address() {
809; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
810; CHECK:         .functype load_sext_v8i16_from_numeric_address () -> (v128)
811; CHECK-NEXT:  # %bb.0:
812; CHECK-NEXT:    i32.const 0
813; CHECK-NEXT:    i16x8.load8x8_s 32
814; CHECK-NEXT:    # fallthrough-return
815  %s = inttoptr i32 32 to <8 x i8>*
816  %v = load <8 x i8>, <8 x i8>* %s
817  %v2 = sext <8 x i8> %v to <8 x i16>
818  ret <8 x i16> %v2
819}
820
821define <8 x i16> @load_zext_v8i16_from_numeric_address() {
822; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
823; CHECK:         .functype load_zext_v8i16_from_numeric_address () -> (v128)
824; CHECK-NEXT:  # %bb.0:
825; CHECK-NEXT:    i32.const 0
826; CHECK-NEXT:    i16x8.load8x8_u 32
827; CHECK-NEXT:    # fallthrough-return
828  %s = inttoptr i32 32 to <8 x i8>*
829  %v = load <8 x i8>, <8 x i8>* %s
830  %v2 = zext <8 x i8> %v to <8 x i16>
831  ret <8 x i16> %v2
832}
833
834define <8 x i8> @load_ext_v8i16_from_numeric_address() {
835; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
836; CHECK:         .functype load_ext_v8i16_from_numeric_address () -> (v128)
837; CHECK-NEXT:  # %bb.0:
838; CHECK-NEXT:    i32.const 0
839; CHECK-NEXT:    i16x8.load8x8_u 32
840; CHECK-NEXT:    # fallthrough-return
841  %s = inttoptr i32 32 to <8 x i8>*
842  %v = load <8 x i8>, <8 x i8>* %s
843  ret <8 x i8> %v
844}
845
846@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
847define <8 x i16> @load_v8i16_from_global_address() {
848; CHECK-LABEL: load_v8i16_from_global_address:
849; CHECK:         .functype load_v8i16_from_global_address () -> (v128)
850; CHECK-NEXT:  # %bb.0:
851; CHECK-NEXT:    i32.const 0
852; CHECK-NEXT:    v128.load gv_v8i16
853; CHECK-NEXT:    # fallthrough-return
854  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
855  ret <8 x i16> %v
856}
857
858@gv_i16 = global i16 42
859define <8 x i16> @load_splat_v8i16_from_global_address() {
860; CHECK-LABEL: load_splat_v8i16_from_global_address:
861; CHECK:         .functype load_splat_v8i16_from_global_address () -> (v128)
862; CHECK-NEXT:  # %bb.0:
863; CHECK-NEXT:    i32.const 0
864; CHECK-NEXT:    v128.load16_splat gv_i16
865; CHECK-NEXT:    # fallthrough-return
866  %e = load i16, i16* @gv_i16
867  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
868  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
869  ret <8 x i16> %v2
870}
871
872@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
873define <8 x i16> @load_sext_v8i16_from_global_address() {
874; CHECK-LABEL: load_sext_v8i16_from_global_address:
875; CHECK:         .functype load_sext_v8i16_from_global_address () -> (v128)
876; CHECK-NEXT:  # %bb.0:
877; CHECK-NEXT:    i32.const 0
878; CHECK-NEXT:    i16x8.load8x8_s gv_v8i8
879; CHECK-NEXT:    # fallthrough-return
880  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
881  %v2 = sext <8 x i8> %v to <8 x i16>
882  ret <8 x i16> %v2
883}
884
885define <8 x i16> @load_zext_v8i16_from_global_address() {
886; CHECK-LABEL: load_zext_v8i16_from_global_address:
887; CHECK:         .functype load_zext_v8i16_from_global_address () -> (v128)
888; CHECK-NEXT:  # %bb.0:
889; CHECK-NEXT:    i32.const 0
890; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
891; CHECK-NEXT:    # fallthrough-return
892  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
893  %v2 = zext <8 x i8> %v to <8 x i16>
894  ret <8 x i16> %v2
895}
896
897define <8 x i8> @load_ext_v8i16_from_global_address() {
898; CHECK-LABEL: load_ext_v8i16_from_global_address:
899; CHECK:         .functype load_ext_v8i16_from_global_address () -> (v128)
900; CHECK-NEXT:  # %bb.0:
901; CHECK-NEXT:    i32.const 0
902; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
903; CHECK-NEXT:    # fallthrough-return
904  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
905  ret <8 x i8> %v
906}
907
908
909define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
910; CHECK-LABEL: store_v8i16:
911; CHECK:         .functype store_v8i16 (v128, i32) -> ()
912; CHECK-NEXT:  # %bb.0:
913; CHECK-NEXT:    local.get 1
914; CHECK-NEXT:    local.get 0
915; CHECK-NEXT:    v128.store 0
916; CHECK-NEXT:    # fallthrough-return
917  store <8 x i16> %v , <8 x i16>* %p
918  ret void
919}
920
921define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) {
922; CHECK-LABEL: store_narrowing_v8i16:
923; CHECK:         .functype store_narrowing_v8i16 (v128, i32) -> ()
924; CHECK-NEXT:  # %bb.0:
925; CHECK-NEXT:    local.get 1
926; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
927; CHECK-NEXT:    local.get 0
928; CHECK-NEXT:    v128.and
929; CHECK-NEXT:    local.get 0
930; CHECK-NEXT:    i8x16.narrow_i16x8_u
931; CHECK-NEXT:    i64x2.extract_lane 0
932; CHECK-NEXT:    i64.store 0
933; CHECK-NEXT:    # fallthrough-return
934  store <8 x i8> %v, <8 x i8>* %p
935  ret void
936}
937
938define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
939; CHECK-LABEL: store_v8i16_with_folded_offset:
940; CHECK:         .functype store_v8i16_with_folded_offset (v128, i32) -> ()
941; CHECK-NEXT:  # %bb.0:
942; CHECK-NEXT:    local.get 1
943; CHECK-NEXT:    local.get 0
944; CHECK-NEXT:    v128.store 16
945; CHECK-NEXT:    # fallthrough-return
946  %q = ptrtoint <8 x i16>* %p to i32
947  %r = add nuw i32 %q, 16
948  %s = inttoptr i32 %r to <8 x i16>*
949  store <8 x i16> %v , <8 x i16>* %s
950  ret void
951}
952
953define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) {
954; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset:
955; CHECK:         .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
956; CHECK-NEXT:  # %bb.0:
957; CHECK-NEXT:    local.get 1
958; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
959; CHECK-NEXT:    local.get 0
960; CHECK-NEXT:    v128.and
961; CHECK-NEXT:    local.get 0
962; CHECK-NEXT:    i8x16.narrow_i16x8_u
963; CHECK-NEXT:    i64x2.extract_lane 0
964; CHECK-NEXT:    i64.store 16
965; CHECK-NEXT:    # fallthrough-return
966  %q = ptrtoint <8 x i8>* %p to i32
967  %r = add nuw i32 %q, 16
968  %s = inttoptr i32 %r to <8 x i8>*
969  store <8 x i8> %v , <8 x i8>* %s
970  ret void
971}
972
973define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
974; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
975; CHECK:         .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
976; CHECK-NEXT:  # %bb.0:
977; CHECK-NEXT:    local.get 1
978; CHECK-NEXT:    local.get 0
979; CHECK-NEXT:    v128.store 16
980; CHECK-NEXT:    # fallthrough-return
981  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
982  store <8 x i16> %v , <8 x i16>* %s
983  ret void
984}
985
986define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
987; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset:
988; CHECK:         .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
989; CHECK-NEXT:  # %bb.0:
990; CHECK-NEXT:    local.get 1
991; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
992; CHECK-NEXT:    local.get 0
993; CHECK-NEXT:    v128.and
994; CHECK-NEXT:    local.get 0
995; CHECK-NEXT:    i8x16.narrow_i16x8_u
996; CHECK-NEXT:    i64x2.extract_lane 0
997; CHECK-NEXT:    i64.store 8
998; CHECK-NEXT:    # fallthrough-return
999  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
1000  store <8 x i8> %v , <8 x i8>* %s
1001  ret void
1002}
1003
1004define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
1005; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
1006; CHECK:         .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
1007; CHECK-NEXT:  # %bb.0:
1008; CHECK-NEXT:    local.get 1
1009; CHECK-NEXT:    i32.const -16
1010; CHECK-NEXT:    i32.add
1011; CHECK-NEXT:    local.get 0
1012; CHECK-NEXT:    v128.store 0
1013; CHECK-NEXT:    # fallthrough-return
1014  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
1015  store <8 x i16> %v , <8 x i16>* %s
1016  ret void
1017}
1018
1019define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) {
1020; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset:
1021; CHECK:         .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
1022; CHECK-NEXT:  # %bb.0:
1023; CHECK-NEXT:    local.get 1
1024; CHECK-NEXT:    i32.const -8
1025; CHECK-NEXT:    i32.add
1026; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
1027; CHECK-NEXT:    local.get 0
1028; CHECK-NEXT:    v128.and
1029; CHECK-NEXT:    local.get 0
1030; CHECK-NEXT:    i8x16.narrow_i16x8_u
1031; CHECK-NEXT:    i64x2.extract_lane 0
1032; CHECK-NEXT:    i64.store 0
1033; CHECK-NEXT:    # fallthrough-return
1034  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
1035  store <8 x i8> %v , <8 x i8>* %s
1036  ret void
1037}
1038
1039define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
1040; CHECK-LABEL: store_v8i16_with_unfolded_offset:
1041; CHECK:         .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
1042; CHECK-NEXT:  # %bb.0:
1043; CHECK-NEXT:    local.get 1
1044; CHECK-NEXT:    i32.const 16
1045; CHECK-NEXT:    i32.add
1046; CHECK-NEXT:    local.get 0
1047; CHECK-NEXT:    v128.store 0
1048; CHECK-NEXT:    # fallthrough-return
1049  %q = ptrtoint <8 x i16>* %p to i32
1050  %r = add nsw i32 %q, 16
1051  %s = inttoptr i32 %r to <8 x i16>*
1052  store <8 x i16> %v , <8 x i16>* %s
1053  ret void
1054}
1055
1056define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) {
1057; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset:
1058; CHECK:         .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> ()
1059; CHECK-NEXT:  # %bb.0:
1060; CHECK-NEXT:    local.get 1
1061; CHECK-NEXT:    i32.const 16
1062; CHECK-NEXT:    i32.add
1063; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
1064; CHECK-NEXT:    local.get 0
1065; CHECK-NEXT:    v128.and
1066; CHECK-NEXT:    local.get 0
1067; CHECK-NEXT:    i8x16.narrow_i16x8_u
1068; CHECK-NEXT:    i64x2.extract_lane 0
1069; CHECK-NEXT:    i64.store 0
1070; CHECK-NEXT:    # fallthrough-return
1071  %q = ptrtoint <8 x i8>* %p to i32
1072  %r = add nsw i32 %q, 16
1073  %s = inttoptr i32 %r to <8 x i8>*
1074  store <8 x i8> %v , <8 x i8>* %s
1075  ret void
1076}
1077
1078define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
1079; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
1080; CHECK:         .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1081; CHECK-NEXT:  # %bb.0:
1082; CHECK-NEXT:    local.get 1
1083; CHECK-NEXT:    i32.const 16
1084; CHECK-NEXT:    i32.add
1085; CHECK-NEXT:    local.get 0
1086; CHECK-NEXT:    v128.store 0
1087; CHECK-NEXT:    # fallthrough-return
1088  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
1089  store <8 x i16> %v , <8 x i16>* %s
1090  ret void
1091}
1092
1093define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
1094; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset:
1095; CHECK:         .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1096; CHECK-NEXT:  # %bb.0:
1097; CHECK-NEXT:    local.get 1
1098; CHECK-NEXT:    i32.const 8
1099; CHECK-NEXT:    i32.add
1100; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
1101; CHECK-NEXT:    local.get 0
1102; CHECK-NEXT:    v128.and
1103; CHECK-NEXT:    local.get 0
1104; CHECK-NEXT:    i8x16.narrow_i16x8_u
1105; CHECK-NEXT:    i64x2.extract_lane 0
1106; CHECK-NEXT:    i64.store 0
1107; CHECK-NEXT:    # fallthrough-return
1108  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
1109  store <8 x i8> %v , <8 x i8>* %s
1110  ret void
1111}
1112
1113define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
1114; CHECK-LABEL: store_v8i16_to_numeric_address:
1115; CHECK:         .functype store_v8i16_to_numeric_address (v128) -> ()
1116; CHECK-NEXT:  # %bb.0:
1117; CHECK-NEXT:    i32.const 0
1118; CHECK-NEXT:    local.get 0
1119; CHECK-NEXT:    v128.store 32
1120; CHECK-NEXT:    # fallthrough-return
1121  %s = inttoptr i32 32 to <8 x i16>*
1122  store <8 x i16> %v , <8 x i16>* %s
1123  ret void
1124}
1125
1126define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) {
1127; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address:
1128; CHECK:         .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
1129; CHECK-NEXT:  # %bb.0:
1130; CHECK-NEXT:    i32.const 0
1131; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
1132; CHECK-NEXT:    local.get 0
1133; CHECK-NEXT:    v128.and
1134; CHECK-NEXT:    local.get 0
1135; CHECK-NEXT:    i8x16.narrow_i16x8_u
1136; CHECK-NEXT:    i64x2.extract_lane 0
1137; CHECK-NEXT:    i64.store 32
1138; CHECK-NEXT:    # fallthrough-return
1139  %s = inttoptr i32 32 to <8 x i8>*
1140  store <8 x i8> %v , <8 x i8>* %s
1141  ret void
1142}
1143
1144define void @store_v8i16_to_global_address(<8 x i16> %v) {
1145; CHECK-LABEL: store_v8i16_to_global_address:
1146; CHECK:         .functype store_v8i16_to_global_address (v128) -> ()
1147; CHECK-NEXT:  # %bb.0:
1148; CHECK-NEXT:    i32.const 0
1149; CHECK-NEXT:    local.get 0
1150; CHECK-NEXT:    v128.store gv_v8i16
1151; CHECK-NEXT:    # fallthrough-return
1152  store <8 x i16> %v , <8 x i16>* @gv_v8i16
1153  ret void
1154}
1155
1156define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
1157; CHECK-LABEL: store_narrowing_v8i16_to_global_address:
1158; CHECK:         .functype store_narrowing_v8i16_to_global_address (v128) -> ()
1159; CHECK-NEXT:  # %bb.0:
1160; CHECK-NEXT:    i32.const 0
1161; CHECK-NEXT:    v128.const 255, 255, 255, 255, 255, 255, 255, 255
1162; CHECK-NEXT:    local.get 0
1163; CHECK-NEXT:    v128.and
1164; CHECK-NEXT:    local.get 0
1165; CHECK-NEXT:    i8x16.narrow_i16x8_u
1166; CHECK-NEXT:    i64x2.extract_lane 0
1167; CHECK-NEXT:    i64.store gv_v8i8
1168; CHECK-NEXT:    # fallthrough-return
1169  store <8 x i8> %v , <8 x i8>* @gv_v8i8
1170  ret void
1171}
1172
1173; ==============================================================================
1174; 4 x i32
1175; ==============================================================================
1176define <4 x i32> @load_v4i32(<4 x i32>* %p) {
1177; CHECK-LABEL: load_v4i32:
1178; CHECK:         .functype load_v4i32 (i32) -> (v128)
1179; CHECK-NEXT:  # %bb.0:
1180; CHECK-NEXT:    local.get 0
1181; CHECK-NEXT:    v128.load 0
1182; CHECK-NEXT:    # fallthrough-return
1183  %v = load <4 x i32>, <4 x i32>* %p
1184  ret <4 x i32> %v
1185}
1186
1187define <4 x i32> @load_splat_v4i32(i32* %addr) {
1188; CHECK-LABEL: load_splat_v4i32:
1189; CHECK:         .functype load_splat_v4i32 (i32) -> (v128)
1190; CHECK-NEXT:  # %bb.0:
1191; CHECK-NEXT:    local.get 0
1192; CHECK-NEXT:    v128.load32_splat 0
1193; CHECK-NEXT:    # fallthrough-return
1194  %e = load i32, i32* %addr, align 4
1195  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1196  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1197  ret <4 x i32> %v2
1198}
1199
1200define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
1201; CHECK-LABEL: load_sext_v4i32:
1202; CHECK:         .functype load_sext_v4i32 (i32) -> (v128)
1203; CHECK-NEXT:  # %bb.0:
1204; CHECK-NEXT:    local.get 0
1205; CHECK-NEXT:    i32x4.load16x4_s 0
1206; CHECK-NEXT:    # fallthrough-return
1207  %v = load <4 x i16>, <4 x i16>* %p
1208  %v2 = sext <4 x i16> %v to <4 x i32>
1209  ret <4 x i32> %v2
1210}
1211
1212define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
1213; CHECK-LABEL: load_zext_v4i32:
1214; CHECK:         .functype load_zext_v4i32 (i32) -> (v128)
1215; CHECK-NEXT:  # %bb.0:
1216; CHECK-NEXT:    local.get 0
1217; CHECK-NEXT:    i32x4.load16x4_u 0
1218; CHECK-NEXT:    # fallthrough-return
1219  %v = load <4 x i16>, <4 x i16>* %p
1220  %v2 = zext <4 x i16> %v to <4 x i32>
1221  ret <4 x i32> %v2
1222}
1223
1224define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
1225; CHECK-LABEL: load_ext_v4i32:
1226; CHECK:         .functype load_ext_v4i32 (i32) -> (v128)
1227; CHECK-NEXT:  # %bb.0:
1228; CHECK-NEXT:    local.get 0
1229; CHECK-NEXT:    i32x4.load16x4_u 0
1230; CHECK-NEXT:    # fallthrough-return
1231  %v = load <4 x i16>, <4 x i16>* %p
1232  ret <4 x i16> %v
1233}
1234
1235define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
1236; CHECK-LABEL: load_v4i32_with_folded_offset:
1237; CHECK:         .functype load_v4i32_with_folded_offset (i32) -> (v128)
1238; CHECK-NEXT:  # %bb.0:
1239; CHECK-NEXT:    local.get 0
1240; CHECK-NEXT:    v128.load 16
1241; CHECK-NEXT:    # fallthrough-return
1242  %q = ptrtoint <4 x i32>* %p to i32
1243  %r = add nuw i32 %q, 16
1244  %s = inttoptr i32 %r to <4 x i32>*
1245  %v = load <4 x i32>, <4 x i32>* %s
1246  ret <4 x i32> %v
1247}
1248
1249define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
1250; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1251; CHECK:         .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
1252; CHECK-NEXT:  # %bb.0:
1253; CHECK-NEXT:    local.get 0
1254; CHECK-NEXT:    v128.load32_splat 16
1255; CHECK-NEXT:    # fallthrough-return
1256  %q = ptrtoint i32* %p to i32
1257  %r = add nuw i32 %q, 16
1258  %s = inttoptr i32 %r to i32*
1259  %e = load i32, i32* %s
1260  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1261  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1262  ret <4 x i32> %v2
1263}
1264
1265define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
1266; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
1267; CHECK:         .functype load_sext_v4i32_with_folded_offset (i32) -> (v128)
1268; CHECK-NEXT:  # %bb.0:
1269; CHECK-NEXT:    local.get 0
1270; CHECK-NEXT:    i32x4.load16x4_s 16
1271; CHECK-NEXT:    # fallthrough-return
1272  %q = ptrtoint <4 x i16>* %p to i32
1273  %r = add nuw i32 %q, 16
1274  %s = inttoptr i32 %r to <4 x i16>*
1275  %v = load <4 x i16>, <4 x i16>* %s
1276  %v2 = sext <4 x i16> %v to <4 x i32>
1277  ret <4 x i32> %v2
1278}
1279
1280define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
1281; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
1282; CHECK:         .functype load_zext_v4i32_with_folded_offset (i32) -> (v128)
1283; CHECK-NEXT:  # %bb.0:
1284; CHECK-NEXT:    local.get 0
1285; CHECK-NEXT:    i32x4.load16x4_u 16
1286; CHECK-NEXT:    # fallthrough-return
1287  %q = ptrtoint <4 x i16>* %p to i32
1288  %r = add nuw i32 %q, 16
1289  %s = inttoptr i32 %r to <4 x i16>*
1290  %v = load <4 x i16>, <4 x i16>* %s
1291  %v2 = zext <4 x i16> %v to <4 x i32>
1292  ret <4 x i32> %v2
1293}
1294
1295define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
1296; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1297; CHECK:         .functype load_ext_v4i32_with_folded_offset (i32) -> (v128)
1298; CHECK-NEXT:  # %bb.0:
1299; CHECK-NEXT:    local.get 0
1300; CHECK-NEXT:    i32x4.load16x4_u 16
1301; CHECK-NEXT:    # fallthrough-return
1302  %q = ptrtoint <4 x i16>* %p to i32
1303  %r = add nuw i32 %q, 16
1304  %s = inttoptr i32 %r to <4 x i16>*
1305  %v = load <4 x i16>, <4 x i16>* %s
1306  ret <4 x i16> %v
1307}
1308
1309define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
1310; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1311; CHECK:         .functype load_v4i32_with_folded_gep_offset (i32) -> (v128)
1312; CHECK-NEXT:  # %bb.0:
1313; CHECK-NEXT:    local.get 0
1314; CHECK-NEXT:    v128.load 16
1315; CHECK-NEXT:    # fallthrough-return
1316  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1317  %v = load <4 x i32>, <4 x i32>* %s
1318  ret <4 x i32> %v
1319}
1320
1321define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
1322; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1323; CHECK:         .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
1324; CHECK-NEXT:  # %bb.0:
1325; CHECK-NEXT:    local.get 0
1326; CHECK-NEXT:    v128.load32_splat 4
1327; CHECK-NEXT:    # fallthrough-return
1328  %s = getelementptr inbounds i32, i32* %p, i32 1
1329  %e = load i32, i32* %s
1330  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1331  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1332  ret <4 x i32> %v2
1333}
1334
1335define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1336; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
1337; CHECK:         .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128)
1338; CHECK-NEXT:  # %bb.0:
1339; CHECK-NEXT:    local.get 0
1340; CHECK-NEXT:    i32x4.load16x4_s 8
1341; CHECK-NEXT:    # fallthrough-return
1342  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1343  %v = load <4 x i16>, <4 x i16>* %s
1344  %v2 = sext <4 x i16> %v to <4 x i32>
1345  ret <4 x i32> %v2
1346}
1347
1348define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1349; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
1350; CHECK:         .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128)
1351; CHECK-NEXT:  # %bb.0:
1352; CHECK-NEXT:    local.get 0
1353; CHECK-NEXT:    i32x4.load16x4_u 8
1354; CHECK-NEXT:    # fallthrough-return
1355  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1356  %v = load <4 x i16>, <4 x i16>* %s
1357  %v2 = zext <4 x i16> %v to <4 x i32>
1358  ret <4 x i32> %v2
1359}
1360
1361define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1362; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1363; CHECK:         .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128)
1364; CHECK-NEXT:  # %bb.0:
1365; CHECK-NEXT:    local.get 0
1366; CHECK-NEXT:    i32x4.load16x4_u 8
1367; CHECK-NEXT:    # fallthrough-return
1368  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1369  %v = load <4 x i16>, <4 x i16>* %s
1370  ret <4 x i16> %v
1371}
1372
1373define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
1374; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1375; CHECK:         .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1376; CHECK-NEXT:  # %bb.0:
1377; CHECK-NEXT:    local.get 0
1378; CHECK-NEXT:    i32.const -16
1379; CHECK-NEXT:    i32.add
1380; CHECK-NEXT:    v128.load 0
1381; CHECK-NEXT:    # fallthrough-return
1382  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1383  %v = load <4 x i32>, <4 x i32>* %s
1384  ret <4 x i32> %v
1385}
1386
1387define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
1388; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1389; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1390; CHECK-NEXT:  # %bb.0:
1391; CHECK-NEXT:    local.get 0
1392; CHECK-NEXT:    i32.const -4
1393; CHECK-NEXT:    i32.add
1394; CHECK-NEXT:    v128.load32_splat 0
1395; CHECK-NEXT:    # fallthrough-return
1396  %s = getelementptr inbounds i32, i32* %p, i32 -1
1397  %e = load i32, i32* %s
1398  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1399  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1400  ret <4 x i32> %v2
1401}
1402
1403define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1404; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
1405; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1406; CHECK-NEXT:  # %bb.0:
1407; CHECK-NEXT:    local.get 0
1408; CHECK-NEXT:    i32.const -8
1409; CHECK-NEXT:    i32.add
1410; CHECK-NEXT:    i32x4.load16x4_s 0
1411; CHECK-NEXT:    # fallthrough-return
1412  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1413  %v = load <4 x i16>, <4 x i16>* %s
1414  %v2 = sext <4 x i16> %v to <4 x i32>
1415  ret <4 x i32> %v2
1416}
1417
1418define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1419; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
1420; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1421; CHECK-NEXT:  # %bb.0:
1422; CHECK-NEXT:    local.get 0
1423; CHECK-NEXT:    i32.const -8
1424; CHECK-NEXT:    i32.add
1425; CHECK-NEXT:    i32x4.load16x4_u 0
1426; CHECK-NEXT:    # fallthrough-return
1427  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1428  %v = load <4 x i16>, <4 x i16>* %s
1429  %v2 = zext <4 x i16> %v to <4 x i32>
1430  ret <4 x i32> %v2
1431}
1432
1433define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1434; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1435; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1436; CHECK-NEXT:  # %bb.0:
1437; CHECK-NEXT:    local.get 0
1438; CHECK-NEXT:    i32.const -8
1439; CHECK-NEXT:    i32.add
1440; CHECK-NEXT:    i32x4.load16x4_u 0
1441; CHECK-NEXT:    # fallthrough-return
1442  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1443  %v = load <4 x i16>, <4 x i16>* %s
1444  ret <4 x i16> %v
1445}
1446
1447define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
1448; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1449; CHECK:         .functype load_v4i32_with_unfolded_offset (i32) -> (v128)
1450; CHECK-NEXT:  # %bb.0:
1451; CHECK-NEXT:    local.get 0
1452; CHECK-NEXT:    i32.const 16
1453; CHECK-NEXT:    i32.add
1454; CHECK-NEXT:    v128.load 0
1455; CHECK-NEXT:    # fallthrough-return
1456  %q = ptrtoint <4 x i32>* %p to i32
1457  %r = add nsw i32 %q, 16
1458  %s = inttoptr i32 %r to <4 x i32>*
1459  %v = load <4 x i32>, <4 x i32>* %s
1460  ret <4 x i32> %v
1461}
1462
1463define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
1464; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1465; CHECK:         .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128)
1466; CHECK-NEXT:  # %bb.0:
1467; CHECK-NEXT:    local.get 0
1468; CHECK-NEXT:    i32.const 16
1469; CHECK-NEXT:    i32.add
1470; CHECK-NEXT:    v128.load32_splat 0
1471; CHECK-NEXT:    # fallthrough-return
1472  %q = ptrtoint i32* %p to i32
1473  %r = add nsw i32 %q, 16
1474  %s = inttoptr i32 %r to i32*
1475  %e = load i32, i32* %s
1476  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1477  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1478  ret <4 x i32> %v2
1479}
1480
1481define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1482; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
1483; CHECK:         .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128)
1484; CHECK-NEXT:  # %bb.0:
1485; CHECK-NEXT:    local.get 0
1486; CHECK-NEXT:    i32.const 16
1487; CHECK-NEXT:    i32.add
1488; CHECK-NEXT:    i32x4.load16x4_s 0
1489; CHECK-NEXT:    # fallthrough-return
1490  %q = ptrtoint <4 x i16>* %p to i32
1491  %r = add nsw i32 %q, 16
1492  %s = inttoptr i32 %r to <4 x i16>*
1493  %v = load <4 x i16>, <4 x i16>* %s
1494  %v2 = sext <4 x i16> %v to <4 x i32>
1495  ret <4 x i32> %v2
1496}
1497
1498define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1499; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
1500; CHECK:         .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128)
1501; CHECK-NEXT:  # %bb.0:
1502; CHECK-NEXT:    local.get 0
1503; CHECK-NEXT:    i32.const 16
1504; CHECK-NEXT:    i32.add
1505; CHECK-NEXT:    i32x4.load16x4_u 0
1506; CHECK-NEXT:    # fallthrough-return
1507  %q = ptrtoint <4 x i16>* %p to i32
1508  %r = add nsw i32 %q, 16
1509  %s = inttoptr i32 %r to <4 x i16>*
1510  %v = load <4 x i16>, <4 x i16>* %s
1511  %v2 = zext <4 x i16> %v to <4 x i32>
1512  ret <4 x i32> %v2
1513}
1514
1515define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1516; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1517; CHECK:         .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128)
1518; CHECK-NEXT:  # %bb.0:
1519; CHECK-NEXT:    local.get 0
1520; CHECK-NEXT:    i32.const 16
1521; CHECK-NEXT:    i32.add
1522; CHECK-NEXT:    i32x4.load16x4_u 0
1523; CHECK-NEXT:    # fallthrough-return
1524  %q = ptrtoint <4 x i16>* %p to i32
1525  %r = add nsw i32 %q, 16
1526  %s = inttoptr i32 %r to <4 x i16>*
1527  %v = load <4 x i16>, <4 x i16>* %s
1528  ret <4 x i16> %v
1529}
1530
1531define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
1532; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1533; CHECK:         .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1534; CHECK-NEXT:  # %bb.0:
1535; CHECK-NEXT:    local.get 0
1536; CHECK-NEXT:    i32.const 16
1537; CHECK-NEXT:    i32.add
1538; CHECK-NEXT:    v128.load 0
1539; CHECK-NEXT:    # fallthrough-return
1540  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1541  %v = load <4 x i32>, <4 x i32>* %s
1542  ret <4 x i32> %v
1543}
1544
1545define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
1546; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1547; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1548; CHECK-NEXT:  # %bb.0:
1549; CHECK-NEXT:    local.get 0
1550; CHECK-NEXT:    i32.const 4
1551; CHECK-NEXT:    i32.add
1552; CHECK-NEXT:    v128.load32_splat 0
1553; CHECK-NEXT:    # fallthrough-return
1554  %s = getelementptr i32, i32* %p, i32 1
1555  %e = load i32, i32* %s
1556  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1557  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1558  ret <4 x i32> %v2
1559}
1560
1561define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1562; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
1563; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1564; CHECK-NEXT:  # %bb.0:
1565; CHECK-NEXT:    local.get 0
1566; CHECK-NEXT:    i32.const 8
1567; CHECK-NEXT:    i32.add
1568; CHECK-NEXT:    i32x4.load16x4_s 0
1569; CHECK-NEXT:    # fallthrough-return
1570  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1571  %v = load <4 x i16>, <4 x i16>* %s
1572  %v2 = sext <4 x i16> %v to <4 x i32>
1573  ret <4 x i32> %v2
1574}
1575
1576define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1577; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
1578; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1579; CHECK-NEXT:  # %bb.0:
1580; CHECK-NEXT:    local.get 0
1581; CHECK-NEXT:    i32.const 8
1582; CHECK-NEXT:    i32.add
1583; CHECK-NEXT:    i32x4.load16x4_u 0
1584; CHECK-NEXT:    # fallthrough-return
1585  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1586  %v = load <4 x i16>, <4 x i16>* %s
1587  %v2 = zext <4 x i16> %v to <4 x i32>
1588  ret <4 x i32> %v2
1589}
1590
1591define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1592; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1593; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1594; CHECK-NEXT:  # %bb.0:
1595; CHECK-NEXT:    local.get 0
1596; CHECK-NEXT:    i32.const 8
1597; CHECK-NEXT:    i32.add
1598; CHECK-NEXT:    i32x4.load16x4_u 0
1599; CHECK-NEXT:    # fallthrough-return
1600  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1601  %v = load <4 x i16>, <4 x i16>* %s
1602  ret <4 x i16> %v
1603}
1604
1605define <4 x i32> @load_v4i32_from_numeric_address() {
1606; CHECK-LABEL: load_v4i32_from_numeric_address:
1607; CHECK:         .functype load_v4i32_from_numeric_address () -> (v128)
1608; CHECK-NEXT:  # %bb.0:
1609; CHECK-NEXT:    i32.const 0
1610; CHECK-NEXT:    v128.load 32
1611; CHECK-NEXT:    # fallthrough-return
1612  %s = inttoptr i32 32 to <4 x i32>*
1613  %v = load <4 x i32>, <4 x i32>* %s
1614  ret <4 x i32> %v
1615}
1616
1617define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1618; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1619; CHECK:         .functype load_splat_v4i32_from_numeric_address () -> (v128)
1620; CHECK-NEXT:  # %bb.0:
1621; CHECK-NEXT:    i32.const 0
1622; CHECK-NEXT:    v128.load32_splat 32
1623; CHECK-NEXT:    # fallthrough-return
1624  %s = inttoptr i32 32 to i32*
1625  %e = load i32, i32* %s
1626  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1627  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1628  ret <4 x i32> %v2
1629}
1630
1631define <4 x i32> @load_sext_v4i32_from_numeric_address() {
1632; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
1633; CHECK:         .functype load_sext_v4i32_from_numeric_address () -> (v128)
1634; CHECK-NEXT:  # %bb.0:
1635; CHECK-NEXT:    i32.const 0
1636; CHECK-NEXT:    i32x4.load16x4_s 32
1637; CHECK-NEXT:    # fallthrough-return
1638  %s = inttoptr i32 32 to <4 x i16>*
1639  %v = load <4 x i16>, <4 x i16>* %s
1640  %v2 = sext <4 x i16> %v to <4 x i32>
1641  ret <4 x i32> %v2
1642}
1643
1644define <4 x i32> @load_zext_v4i32_from_numeric_address() {
1645; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
1646; CHECK:         .functype load_zext_v4i32_from_numeric_address () -> (v128)
1647; CHECK-NEXT:  # %bb.0:
1648; CHECK-NEXT:    i32.const 0
1649; CHECK-NEXT:    i32x4.load16x4_u 32
1650; CHECK-NEXT:    # fallthrough-return
1651  %s = inttoptr i32 32 to <4 x i16>*
1652  %v = load <4 x i16>, <4 x i16>* %s
1653  %v2 = zext <4 x i16> %v to <4 x i32>
1654  ret <4 x i32> %v2
1655}
1656
1657define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1658; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1659; CHECK:         .functype load_ext_v4i32_from_numeric_address () -> (v128)
1660; CHECK-NEXT:  # %bb.0:
1661; CHECK-NEXT:    i32.const 0
1662; CHECK-NEXT:    i32x4.load16x4_u 32
1663; CHECK-NEXT:    # fallthrough-return
1664  %s = inttoptr i32 32 to <4 x i16>*
1665  %v = load <4 x i16>, <4 x i16>* %s
1666  ret <4 x i16> %v
1667}
1668
1669@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1670define <4 x i32> @load_v4i32_from_global_address() {
1671; CHECK-LABEL: load_v4i32_from_global_address:
1672; CHECK:         .functype load_v4i32_from_global_address () -> (v128)
1673; CHECK-NEXT:  # %bb.0:
1674; CHECK-NEXT:    i32.const 0
1675; CHECK-NEXT:    v128.load gv_v4i32
1676; CHECK-NEXT:    # fallthrough-return
1677  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
1678  ret <4 x i32> %v
1679}
1680
1681@gv_i32 = global i32 42
1682define <4 x i32> @load_splat_v4i32_from_global_address() {
1683; CHECK-LABEL: load_splat_v4i32_from_global_address:
1684; CHECK:         .functype load_splat_v4i32_from_global_address () -> (v128)
1685; CHECK-NEXT:  # %bb.0:
1686; CHECK-NEXT:    i32.const 0
1687; CHECK-NEXT:    v128.load32_splat gv_i32
1688; CHECK-NEXT:    # fallthrough-return
1689  %e = load i32, i32* @gv_i32
1690  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1691  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1692  ret <4 x i32> %v2
1693}
1694
1695@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1696define <4 x i32> @load_sext_v4i32_from_global_address() {
1697; CHECK-LABEL: load_sext_v4i32_from_global_address:
1698; CHECK:         .functype load_sext_v4i32_from_global_address () -> (v128)
1699; CHECK-NEXT:  # %bb.0:
1700; CHECK-NEXT:    i32.const 0
1701; CHECK-NEXT:    i32x4.load16x4_s gv_v4i16
1702; CHECK-NEXT:    # fallthrough-return
1703  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1704  %v2 = sext <4 x i16> %v to <4 x i32>
1705  ret <4 x i32> %v2
1706}
1707
1708define <4 x i32> @load_zext_v4i32_from_global_address() {
1709; CHECK-LABEL: load_zext_v4i32_from_global_address:
1710; CHECK:         .functype load_zext_v4i32_from_global_address () -> (v128)
1711; CHECK-NEXT:  # %bb.0:
1712; CHECK-NEXT:    i32.const 0
1713; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1714; CHECK-NEXT:    # fallthrough-return
1715  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1716  %v2 = zext <4 x i16> %v to <4 x i32>
1717  ret <4 x i32> %v2
1718}
1719
1720define <4 x i16> @load_ext_v4i32_from_global_address() {
1721; CHECK-LABEL: load_ext_v4i32_from_global_address:
1722; CHECK:         .functype load_ext_v4i32_from_global_address () -> (v128)
1723; CHECK-NEXT:  # %bb.0:
1724; CHECK-NEXT:    i32.const 0
1725; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1726; CHECK-NEXT:    # fallthrough-return
1727  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1728  ret <4 x i16> %v
1729}
1730
1731define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
1732; CHECK-LABEL: store_v4i32:
1733; CHECK:         .functype store_v4i32 (v128, i32) -> ()
1734; CHECK-NEXT:  # %bb.0:
1735; CHECK-NEXT:    local.get 1
1736; CHECK-NEXT:    local.get 0
1737; CHECK-NEXT:    v128.store 0
1738; CHECK-NEXT:    # fallthrough-return
1739  store <4 x i32> %v , <4 x i32>* %p
1740  ret void
1741}
1742
1743define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) {
1744; CHECK-LABEL: store_narrowing_v4i32:
1745; CHECK:         .functype store_narrowing_v4i32 (v128, i32) -> ()
1746; CHECK-NEXT:  # %bb.0:
1747; CHECK-NEXT:    local.get 1
1748; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1749; CHECK-NEXT:    local.get 0
1750; CHECK-NEXT:    v128.and
1751; CHECK-NEXT:    local.get 0
1752; CHECK-NEXT:    i16x8.narrow_i32x4_u
1753; CHECK-NEXT:    i64x2.extract_lane 0
1754; CHECK-NEXT:    i64.store 0
1755; CHECK-NEXT:    # fallthrough-return
1756  store <4 x i16> %v , <4 x i16>* %p
1757  ret void
1758}
1759
1760define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
1761; CHECK-LABEL: store_v4i32_with_folded_offset:
1762; CHECK:         .functype store_v4i32_with_folded_offset (v128, i32) -> ()
1763; CHECK-NEXT:  # %bb.0:
1764; CHECK-NEXT:    local.get 1
1765; CHECK-NEXT:    local.get 0
1766; CHECK-NEXT:    v128.store 16
1767; CHECK-NEXT:    # fallthrough-return
1768  %q = ptrtoint <4 x i32>* %p to i32
1769  %r = add nuw i32 %q, 16
1770  %s = inttoptr i32 %r to <4 x i32>*
1771  store <4 x i32> %v , <4 x i32>* %s
1772  ret void
1773}
1774
1775define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) {
1776; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset:
1777; CHECK:         .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
1778; CHECK-NEXT:  # %bb.0:
1779; CHECK-NEXT:    local.get 1
1780; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1781; CHECK-NEXT:    local.get 0
1782; CHECK-NEXT:    v128.and
1783; CHECK-NEXT:    local.get 0
1784; CHECK-NEXT:    i16x8.narrow_i32x4_u
1785; CHECK-NEXT:    i64x2.extract_lane 0
1786; CHECK-NEXT:    i64.store 16
1787; CHECK-NEXT:    # fallthrough-return
1788  %q = ptrtoint <4 x i16>* %p to i32
1789  %r = add nuw i32 %q, 16
1790  %s = inttoptr i32 %r to <4 x i16>*
1791  store <4 x i16> %v , <4 x i16>* %s
1792  ret void
1793}
1794
1795define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1796; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
1797; CHECK:         .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
1798; CHECK-NEXT:  # %bb.0:
1799; CHECK-NEXT:    local.get 1
1800; CHECK-NEXT:    local.get 0
1801; CHECK-NEXT:    v128.store 16
1802; CHECK-NEXT:    # fallthrough-return
1803  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1804  store <4 x i32> %v , <4 x i32>* %s
1805  ret void
1806}
1807
1808define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
1809; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset:
1810; CHECK:         .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
1811; CHECK-NEXT:  # %bb.0:
1812; CHECK-NEXT:    local.get 1
1813; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1814; CHECK-NEXT:    local.get 0
1815; CHECK-NEXT:    v128.and
1816; CHECK-NEXT:    local.get 0
1817; CHECK-NEXT:    i16x8.narrow_i32x4_u
1818; CHECK-NEXT:    i64x2.extract_lane 0
1819; CHECK-NEXT:    i64.store 8
1820; CHECK-NEXT:    # fallthrough-return
1821  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1822  store <4 x i16> %v , <4 x i16>* %s
1823  ret void
1824}
1825
1826define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
1827; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
1828; CHECK:         .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1829; CHECK-NEXT:  # %bb.0:
1830; CHECK-NEXT:    local.get 1
1831; CHECK-NEXT:    i32.const -16
1832; CHECK-NEXT:    i32.add
1833; CHECK-NEXT:    local.get 0
1834; CHECK-NEXT:    v128.store 0
1835; CHECK-NEXT:    # fallthrough-return
1836  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1837  store <4 x i32> %v , <4 x i32>* %s
1838  ret void
1839}
1840
1841define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) {
1842; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset:
1843; CHECK:         .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1844; CHECK-NEXT:  # %bb.0:
1845; CHECK-NEXT:    local.get 1
1846; CHECK-NEXT:    i32.const -8
1847; CHECK-NEXT:    i32.add
1848; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1849; CHECK-NEXT:    local.get 0
1850; CHECK-NEXT:    v128.and
1851; CHECK-NEXT:    local.get 0
1852; CHECK-NEXT:    i16x8.narrow_i32x4_u
1853; CHECK-NEXT:    i64x2.extract_lane 0
1854; CHECK-NEXT:    i64.store 0
1855; CHECK-NEXT:    # fallthrough-return
1856  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1857  store <4 x i16> %v , <4 x i16>* %s
1858  ret void
1859}
1860
1861define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
1862; CHECK-LABEL: store_v4i32_with_unfolded_offset:
1863; CHECK:         .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
1864; CHECK-NEXT:  # %bb.0:
1865; CHECK-NEXT:    local.get 1
1866; CHECK-NEXT:    i32.const 16
1867; CHECK-NEXT:    i32.add
1868; CHECK-NEXT:    local.get 0
1869; CHECK-NEXT:    v128.store 0
1870; CHECK-NEXT:    # fallthrough-return
1871  %q = ptrtoint <4 x i32>* %p to i32
1872  %r = add nsw i32 %q, 16
1873  %s = inttoptr i32 %r to <4 x i32>*
1874  store <4 x i32> %v , <4 x i32>* %s
1875  ret void
1876}
1877
1878define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) {
1879; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset:
1880; CHECK:         .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> ()
1881; CHECK-NEXT:  # %bb.0:
1882; CHECK-NEXT:    local.get 1
1883; CHECK-NEXT:    i32.const 16
1884; CHECK-NEXT:    i32.add
1885; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1886; CHECK-NEXT:    local.get 0
1887; CHECK-NEXT:    v128.and
1888; CHECK-NEXT:    local.get 0
1889; CHECK-NEXT:    i16x8.narrow_i32x4_u
1890; CHECK-NEXT:    i64x2.extract_lane 0
1891; CHECK-NEXT:    i64.store 0
1892; CHECK-NEXT:    # fallthrough-return
1893  %q = ptrtoint <4 x i16>* %p to i32
1894  %r = add nsw i32 %q, 16
1895  %s = inttoptr i32 %r to <4 x i16>*
1896  store <4 x i16> %v , <4 x i16>* %s
1897  ret void
1898}
1899
1900define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1901; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
1902; CHECK:         .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
1903; CHECK-NEXT:  # %bb.0:
1904; CHECK-NEXT:    local.get 1
1905; CHECK-NEXT:    i32.const 16
1906; CHECK-NEXT:    i32.add
1907; CHECK-NEXT:    local.get 0
1908; CHECK-NEXT:    v128.store 0
1909; CHECK-NEXT:    # fallthrough-return
1910  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1911  store <4 x i32> %v , <4 x i32>* %s
1912  ret void
1913}
1914
1915define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
1916; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset:
1917; CHECK:         .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
1918; CHECK-NEXT:  # %bb.0:
1919; CHECK-NEXT:    local.get 1
1920; CHECK-NEXT:    i32.const 8
1921; CHECK-NEXT:    i32.add
1922; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1923; CHECK-NEXT:    local.get 0
1924; CHECK-NEXT:    v128.and
1925; CHECK-NEXT:    local.get 0
1926; CHECK-NEXT:    i16x8.narrow_i32x4_u
1927; CHECK-NEXT:    i64x2.extract_lane 0
1928; CHECK-NEXT:    i64.store 0
1929; CHECK-NEXT:    # fallthrough-return
1930  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1931  store <4 x i16> %v , <4 x i16>* %s
1932  ret void
1933}
1934
1935define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
1936; CHECK-LABEL: store_v4i32_to_numeric_address:
1937; CHECK:         .functype store_v4i32_to_numeric_address (v128) -> ()
1938; CHECK-NEXT:  # %bb.0:
1939; CHECK-NEXT:    i32.const 0
1940; CHECK-NEXT:    local.get 0
1941; CHECK-NEXT:    v128.store 32
1942; CHECK-NEXT:    # fallthrough-return
1943  %s = inttoptr i32 32 to <4 x i32>*
1944  store <4 x i32> %v , <4 x i32>* %s
1945  ret void
1946}
1947
1948define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
1949; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address:
1950; CHECK:         .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
1951; CHECK-NEXT:  # %bb.0:
1952; CHECK-NEXT:    i32.const 0
1953; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1954; CHECK-NEXT:    local.get 0
1955; CHECK-NEXT:    v128.and
1956; CHECK-NEXT:    local.get 0
1957; CHECK-NEXT:    i16x8.narrow_i32x4_u
1958; CHECK-NEXT:    i64x2.extract_lane 0
1959; CHECK-NEXT:    i64.store 32
1960; CHECK-NEXT:    # fallthrough-return
1961  %s = inttoptr i32 32 to <4 x i16>*
1962  store <4 x i16> %v , <4 x i16>* %s
1963  ret void
1964}
1965
1966define void @store_v4i32_to_global_address(<4 x i32> %v) {
1967; CHECK-LABEL: store_v4i32_to_global_address:
1968; CHECK:         .functype store_v4i32_to_global_address (v128) -> ()
1969; CHECK-NEXT:  # %bb.0:
1970; CHECK-NEXT:    i32.const 0
1971; CHECK-NEXT:    local.get 0
1972; CHECK-NEXT:    v128.store gv_v4i32
1973; CHECK-NEXT:    # fallthrough-return
1974  store <4 x i32> %v , <4 x i32>* @gv_v4i32
1975  ret void
1976}
1977
1978define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
1979; CHECK-LABEL: store_narrowing_v4i32_to_global_address:
1980; CHECK:         .functype store_narrowing_v4i32_to_global_address (v128) -> ()
1981; CHECK-NEXT:  # %bb.0:
1982; CHECK-NEXT:    i32.const 0
1983; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
1984; CHECK-NEXT:    local.get 0
1985; CHECK-NEXT:    v128.and
1986; CHECK-NEXT:    local.get 0
1987; CHECK-NEXT:    i16x8.narrow_i32x4_u
1988; CHECK-NEXT:    i64x2.extract_lane 0
1989; CHECK-NEXT:    i64.store gv_v4i16
1990; CHECK-NEXT:    # fallthrough-return
1991  store <4 x i16> %v , <4 x i16>* @gv_v4i16
1992  ret void
1993}
1994
1995; ==============================================================================
1996; 2 x i64
1997; ==============================================================================
1998define <2 x i64> @load_v2i64(<2 x i64>* %p) {
1999; CHECK-LABEL: load_v2i64:
2000; CHECK:         .functype load_v2i64 (i32) -> (v128)
2001; CHECK-NEXT:  # %bb.0:
2002; CHECK-NEXT:    local.get 0
2003; CHECK-NEXT:    v128.load 0
2004; CHECK-NEXT:    # fallthrough-return
2005  %v = load <2 x i64>, <2 x i64>* %p
2006  ret <2 x i64> %v
2007}
2008
2009define <2 x i64> @load_splat_v2i64(i64* %p) {
2010; CHECK-LABEL: load_splat_v2i64:
2011; CHECK:         .functype load_splat_v2i64 (i32) -> (v128)
2012; CHECK-NEXT:  # %bb.0:
2013; CHECK-NEXT:    local.get 0
2014; CHECK-NEXT:    v128.load64_splat 0
2015; CHECK-NEXT:    # fallthrough-return
2016  %e = load i64, i64* %p
2017  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2018  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2019  ret <2 x i64> %v2
2020}
2021
2022define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
2023; CHECK-LABEL: load_sext_v2i64:
2024; CHECK:         .functype load_sext_v2i64 (i32) -> (v128)
2025; CHECK-NEXT:  # %bb.0:
2026; CHECK-NEXT:    local.get 0
2027; CHECK-NEXT:    i64x2.load32x2_s 0
2028; CHECK-NEXT:    # fallthrough-return
2029  %v = load <2 x i32>, <2 x i32>* %p
2030  %v2 = sext <2 x i32> %v to <2 x i64>
2031  ret <2 x i64> %v2
2032}
2033
2034define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
2035; CHECK-LABEL: load_zext_v2i64:
2036; CHECK:         .functype load_zext_v2i64 (i32) -> (v128)
2037; CHECK-NEXT:  # %bb.0:
2038; CHECK-NEXT:    local.get 0
2039; CHECK-NEXT:    i64x2.load32x2_u 0
2040; CHECK-NEXT:    # fallthrough-return
2041  %v = load <2 x i32>, <2 x i32>* %p
2042  %v2 = zext <2 x i32> %v to <2 x i64>
2043  ret <2 x i64> %v2
2044}
2045
2046define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
2047; CHECK-LABEL: load_ext_v2i64:
2048; CHECK:         .functype load_ext_v2i64 (i32) -> (v128)
2049; CHECK-NEXT:  # %bb.0:
2050; CHECK-NEXT:    local.get 0
2051; CHECK-NEXT:    i64x2.load32x2_u 0
2052; CHECK-NEXT:    # fallthrough-return
2053  %v = load <2 x i32>, <2 x i32>* %p
2054  ret <2 x i32> %v
2055}
2056
2057define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
2058; CHECK-LABEL: load_v2i64_with_folded_offset:
2059; CHECK:         .functype load_v2i64_with_folded_offset (i32) -> (v128)
2060; CHECK-NEXT:  # %bb.0:
2061; CHECK-NEXT:    local.get 0
2062; CHECK-NEXT:    v128.load 16
2063; CHECK-NEXT:    # fallthrough-return
2064  %q = ptrtoint <2 x i64>* %p to i32
2065  %r = add nuw i32 %q, 16
2066  %s = inttoptr i32 %r to <2 x i64>*
2067  %v = load <2 x i64>, <2 x i64>* %s
2068  ret <2 x i64> %v
2069}
2070
2071define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
2072; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
2073; CHECK:         .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
2074; CHECK-NEXT:  # %bb.0:
2075; CHECK-NEXT:    local.get 0
2076; CHECK-NEXT:    v128.load64_splat 16
2077; CHECK-NEXT:    # fallthrough-return
2078  %q = ptrtoint i64* %p to i32
2079  %r = add nuw i32 %q, 16
2080  %s = inttoptr i32 %r to i64*
2081  %e = load i64, i64* %s
2082  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2083  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2084  ret <2 x i64> %v2
2085}
2086
2087define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
2088; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
2089; CHECK:         .functype load_sext_v2i64_with_folded_offset (i32) -> (v128)
2090; CHECK-NEXT:  # %bb.0:
2091; CHECK-NEXT:    local.get 0
2092; CHECK-NEXT:    i64x2.load32x2_s 16
2093; CHECK-NEXT:    # fallthrough-return
2094  %q = ptrtoint <2 x i32>* %p to i32
2095  %r = add nuw i32 %q, 16
2096  %s = inttoptr i32 %r to <2 x i32>*
2097  %v = load <2 x i32>, <2 x i32>* %s
2098  %v2 = sext <2 x i32> %v to <2 x i64>
2099  ret <2 x i64> %v2
2100}
2101
2102define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
2103; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
2104; CHECK:         .functype load_zext_v2i64_with_folded_offset (i32) -> (v128)
2105; CHECK-NEXT:  # %bb.0:
2106; CHECK-NEXT:    local.get 0
2107; CHECK-NEXT:    i64x2.load32x2_u 16
2108; CHECK-NEXT:    # fallthrough-return
2109  %q = ptrtoint <2 x i32>* %p to i32
2110  %r = add nuw i32 %q, 16
2111  %s = inttoptr i32 %r to <2 x i32>*
2112  %v = load <2 x i32>, <2 x i32>* %s
2113  %v2 = zext <2 x i32> %v to <2 x i64>
2114  ret <2 x i64> %v2
2115}
2116
2117define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
2118; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
2119; CHECK:         .functype load_ext_v2i64_with_folded_offset (i32) -> (v128)
2120; CHECK-NEXT:  # %bb.0:
2121; CHECK-NEXT:    local.get 0
2122; CHECK-NEXT:    i64x2.load32x2_u 16
2123; CHECK-NEXT:    # fallthrough-return
2124  %q = ptrtoint <2 x i32>* %p to i32
2125  %r = add nuw i32 %q, 16
2126  %s = inttoptr i32 %r to <2 x i32>*
2127  %v = load <2 x i32>, <2 x i32>* %s
2128  ret <2 x i32> %v
2129}
2130
2131define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
2132; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
2133; CHECK:         .functype load_v2i64_with_folded_gep_offset (i32) -> (v128)
2134; CHECK-NEXT:  # %bb.0:
2135; CHECK-NEXT:    local.get 0
2136; CHECK-NEXT:    v128.load 16
2137; CHECK-NEXT:    # fallthrough-return
2138  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2139  %v = load <2 x i64>, <2 x i64>* %s
2140  ret <2 x i64> %v
2141}
2142
2143define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
2144; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
2145; CHECK:         .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
2146; CHECK-NEXT:  # %bb.0:
2147; CHECK-NEXT:    local.get 0
2148; CHECK-NEXT:    v128.load64_splat 8
2149; CHECK-NEXT:    # fallthrough-return
2150  %s = getelementptr inbounds i64, i64* %p, i32 1
2151  %e = load i64, i64* %s
2152  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2153  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2154  ret <2 x i64> %v2
2155}
2156
2157define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2158; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
2159; CHECK:         .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128)
2160; CHECK-NEXT:  # %bb.0:
2161; CHECK-NEXT:    local.get 0
2162; CHECK-NEXT:    i64x2.load32x2_s 8
2163; CHECK-NEXT:    # fallthrough-return
2164  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2165  %v = load <2 x i32>, <2 x i32>* %s
2166  %v2 = sext <2 x i32> %v to <2 x i64>
2167  ret <2 x i64> %v2
2168}
2169
2170define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2171; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
2172; CHECK:         .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128)
2173; CHECK-NEXT:  # %bb.0:
2174; CHECK-NEXT:    local.get 0
2175; CHECK-NEXT:    i64x2.load32x2_u 8
2176; CHECK-NEXT:    # fallthrough-return
2177  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2178  %v = load <2 x i32>, <2 x i32>* %s
2179  %v2 = zext <2 x i32> %v to <2 x i64>
2180  ret <2 x i64> %v2
2181}
2182
2183define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2184; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
2185; CHECK:         .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128)
2186; CHECK-NEXT:  # %bb.0:
2187; CHECK-NEXT:    local.get 0
2188; CHECK-NEXT:    i64x2.load32x2_u 8
2189; CHECK-NEXT:    # fallthrough-return
2190  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2191  %v = load <2 x i32>, <2 x i32>* %s
2192  ret <2 x i32> %v
2193}
2194
2195define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
2196; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
2197; CHECK:         .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2198; CHECK-NEXT:  # %bb.0:
2199; CHECK-NEXT:    local.get 0
2200; CHECK-NEXT:    i32.const -16
2201; CHECK-NEXT:    i32.add
2202; CHECK-NEXT:    v128.load 0
2203; CHECK-NEXT:    # fallthrough-return
2204  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2205  %v = load <2 x i64>, <2 x i64>* %s
2206  ret <2 x i64> %v
2207}
2208
2209define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
2210; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
2211; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2212; CHECK-NEXT:  # %bb.0:
2213; CHECK-NEXT:    local.get 0
2214; CHECK-NEXT:    i32.const -8
2215; CHECK-NEXT:    i32.add
2216; CHECK-NEXT:    v128.load64_splat 0
2217; CHECK-NEXT:    # fallthrough-return
2218  %s = getelementptr inbounds i64, i64* %p, i32 -1
2219  %e = load i64, i64* %s
2220  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2221  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2222  ret <2 x i64> %v2
2223}
2224
2225define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2226; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
2227; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2228; CHECK-NEXT:  # %bb.0:
2229; CHECK-NEXT:    local.get 0
2230; CHECK-NEXT:    i32.const -8
2231; CHECK-NEXT:    i32.add
2232; CHECK-NEXT:    i64x2.load32x2_s 0
2233; CHECK-NEXT:    # fallthrough-return
2234  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2235  %v = load <2 x i32>, <2 x i32>* %s
2236  %v2 = sext <2 x i32> %v to <2 x i64>
2237  ret <2 x i64> %v2
2238}
2239
2240define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2241; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
2242; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2243; CHECK-NEXT:  # %bb.0:
2244; CHECK-NEXT:    local.get 0
2245; CHECK-NEXT:    i32.const -8
2246; CHECK-NEXT:    i32.add
2247; CHECK-NEXT:    i64x2.load32x2_u 0
2248; CHECK-NEXT:    # fallthrough-return
2249  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2250  %v = load <2 x i32>, <2 x i32>* %s
2251  %v2 = zext <2 x i32> %v to <2 x i64>
2252  ret <2 x i64> %v2
2253}
2254
2255define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2256; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
2257; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2258; CHECK-NEXT:  # %bb.0:
2259; CHECK-NEXT:    local.get 0
2260; CHECK-NEXT:    i32.const -8
2261; CHECK-NEXT:    i32.add
2262; CHECK-NEXT:    i64x2.load32x2_u 0
2263; CHECK-NEXT:    # fallthrough-return
2264  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2265  %v = load <2 x i32>, <2 x i32>* %s
2266  ret <2 x i32> %v
2267}
2268
2269define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
2270; CHECK-LABEL: load_v2i64_with_unfolded_offset:
2271; CHECK:         .functype load_v2i64_with_unfolded_offset (i32) -> (v128)
2272; CHECK-NEXT:  # %bb.0:
2273; CHECK-NEXT:    local.get 0
2274; CHECK-NEXT:    i32.const 16
2275; CHECK-NEXT:    i32.add
2276; CHECK-NEXT:    v128.load 0
2277; CHECK-NEXT:    # fallthrough-return
2278  %q = ptrtoint <2 x i64>* %p to i32
2279  %r = add nsw i32 %q, 16
2280  %s = inttoptr i32 %r to <2 x i64>*
2281  %v = load <2 x i64>, <2 x i64>* %s
2282  ret <2 x i64> %v
2283}
2284
2285define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
2286; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
2287; CHECK:         .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128)
2288; CHECK-NEXT:  # %bb.0:
2289; CHECK-NEXT:    local.get 0
2290; CHECK-NEXT:    i32.const 16
2291; CHECK-NEXT:    i32.add
2292; CHECK-NEXT:    v128.load64_splat 0
2293; CHECK-NEXT:    # fallthrough-return
2294  %q = ptrtoint i64* %p to i32
2295  %r = add nsw i32 %q, 16
2296  %s = inttoptr i32 %r to i64*
2297  %e = load i64, i64* %s
2298  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2299  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2300  ret <2 x i64> %v2
2301}
2302
2303define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2304; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
2305; CHECK:         .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128)
2306; CHECK-NEXT:  # %bb.0:
2307; CHECK-NEXT:    local.get 0
2308; CHECK-NEXT:    i32.const 16
2309; CHECK-NEXT:    i32.add
2310; CHECK-NEXT:    i64x2.load32x2_s 0
2311; CHECK-NEXT:    # fallthrough-return
2312  %q = ptrtoint <2 x i32>* %p to i32
2313  %r = add nsw i32 %q, 16
2314  %s = inttoptr i32 %r to <2 x i32>*
2315  %v = load <2 x i32>, <2 x i32>* %s
2316  %v2 = sext <2 x i32> %v to <2 x i64>
2317  ret <2 x i64> %v2
2318}
2319
2320define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2321; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
2322; CHECK:         .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128)
2323; CHECK-NEXT:  # %bb.0:
2324; CHECK-NEXT:    local.get 0
2325; CHECK-NEXT:    i32.const 16
2326; CHECK-NEXT:    i32.add
2327; CHECK-NEXT:    i64x2.load32x2_u 0
2328; CHECK-NEXT:    # fallthrough-return
2329  %q = ptrtoint <2 x i32>* %p to i32
2330  %r = add nsw i32 %q, 16
2331  %s = inttoptr i32 %r to <2 x i32>*
2332  %v = load <2 x i32>, <2 x i32>* %s
2333  %v2 = zext <2 x i32> %v to <2 x i64>
2334  ret <2 x i64> %v2
2335}
2336
2337define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2338; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
2339; CHECK:         .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128)
2340; CHECK-NEXT:  # %bb.0:
2341; CHECK-NEXT:    local.get 0
2342; CHECK-NEXT:    i32.const 16
2343; CHECK-NEXT:    i32.add
2344; CHECK-NEXT:    i64x2.load32x2_u 0
2345; CHECK-NEXT:    # fallthrough-return
2346  %q = ptrtoint <2 x i32>* %p to i32
2347  %r = add nsw i32 %q, 16
2348  %s = inttoptr i32 %r to <2 x i32>*
2349  %v = load <2 x i32>, <2 x i32>* %s
2350  ret <2 x i32> %v
2351}
2352
2353define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
2354; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
2355; CHECK:         .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2356; CHECK-NEXT:  # %bb.0:
2357; CHECK-NEXT:    local.get 0
2358; CHECK-NEXT:    i32.const 16
2359; CHECK-NEXT:    i32.add
2360; CHECK-NEXT:    v128.load 0
2361; CHECK-NEXT:    # fallthrough-return
2362  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2363  %v = load <2 x i64>, <2 x i64>* %s
2364  ret <2 x i64> %v
2365}
2366
2367define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
2368; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
2369; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2370; CHECK-NEXT:  # %bb.0:
2371; CHECK-NEXT:    local.get 0
2372; CHECK-NEXT:    i32.const 8
2373; CHECK-NEXT:    i32.add
2374; CHECK-NEXT:    v128.load64_splat 0
2375; CHECK-NEXT:    # fallthrough-return
2376  %s = getelementptr i64, i64* %p, i32 1
2377  %e = load i64, i64* %s
2378  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2379  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2380  ret <2 x i64> %v2
2381}
2382
2383define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2384; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
2385; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2386; CHECK-NEXT:  # %bb.0:
2387; CHECK-NEXT:    local.get 0
2388; CHECK-NEXT:    i32.const 8
2389; CHECK-NEXT:    i32.add
2390; CHECK-NEXT:    i64x2.load32x2_s 0
2391; CHECK-NEXT:    # fallthrough-return
2392  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2393  %v = load <2 x i32>, <2 x i32>* %s
2394  %v2 = sext <2 x i32> %v to <2 x i64>
2395  ret <2 x i64> %v2
2396}
2397
2398define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2399; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
2400; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2401; CHECK-NEXT:  # %bb.0:
2402; CHECK-NEXT:    local.get 0
2403; CHECK-NEXT:    i32.const 8
2404; CHECK-NEXT:    i32.add
2405; CHECK-NEXT:    i64x2.load32x2_u 0
2406; CHECK-NEXT:    # fallthrough-return
2407  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2408  %v = load <2 x i32>, <2 x i32>* %s
2409  %v2 = zext <2 x i32> %v to <2 x i64>
2410  ret <2 x i64> %v2
2411}
2412
2413define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2414; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
2415; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2416; CHECK-NEXT:  # %bb.0:
2417; CHECK-NEXT:    local.get 0
2418; CHECK-NEXT:    i32.const 8
2419; CHECK-NEXT:    i32.add
2420; CHECK-NEXT:    i64x2.load32x2_u 0
2421; CHECK-NEXT:    # fallthrough-return
2422  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2423  %v = load <2 x i32>, <2 x i32>* %s
2424  ret <2 x i32> %v
2425}
2426
2427define <2 x i64> @load_v2i64_from_numeric_address() {
2428; CHECK-LABEL: load_v2i64_from_numeric_address:
2429; CHECK:         .functype load_v2i64_from_numeric_address () -> (v128)
2430; CHECK-NEXT:  # %bb.0:
2431; CHECK-NEXT:    i32.const 0
2432; CHECK-NEXT:    v128.load 32
2433; CHECK-NEXT:    # fallthrough-return
2434  %s = inttoptr i32 32 to <2 x i64>*
2435  %v = load <2 x i64>, <2 x i64>* %s
2436  ret <2 x i64> %v
2437}
2438
2439define <2 x i64> @load_splat_v2i64_from_numeric_address() {
2440; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
2441; CHECK:         .functype load_splat_v2i64_from_numeric_address () -> (v128)
2442; CHECK-NEXT:  # %bb.0:
2443; CHECK-NEXT:    i32.const 0
2444; CHECK-NEXT:    v128.load64_splat 32
2445; CHECK-NEXT:    # fallthrough-return
2446  %s = inttoptr i32 32 to i64*
2447  %e = load i64, i64* %s
2448  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2449  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2450  ret <2 x i64> %v2
2451}
2452
2453define <2 x i64> @load_sext_v2i64_from_numeric_address() {
2454; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
2455; CHECK:         .functype load_sext_v2i64_from_numeric_address () -> (v128)
2456; CHECK-NEXT:  # %bb.0:
2457; CHECK-NEXT:    i32.const 0
2458; CHECK-NEXT:    i64x2.load32x2_s 32
2459; CHECK-NEXT:    # fallthrough-return
2460  %s = inttoptr i32 32 to <2 x i32>*
2461  %v = load <2 x i32>, <2 x i32>* %s
2462  %v2 = sext <2 x i32> %v to <2 x i64>
2463  ret <2 x i64> %v2
2464}
2465
2466define <2 x i64> @load_zext_v2i64_from_numeric_address() {
2467; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
2468; CHECK:         .functype load_zext_v2i64_from_numeric_address () -> (v128)
2469; CHECK-NEXT:  # %bb.0:
2470; CHECK-NEXT:    i32.const 0
2471; CHECK-NEXT:    i64x2.load32x2_u 32
2472; CHECK-NEXT:    # fallthrough-return
2473  %s = inttoptr i32 32 to <2 x i32>*
2474  %v = load <2 x i32>, <2 x i32>* %s
2475  %v2 = zext <2 x i32> %v to <2 x i64>
2476  ret <2 x i64> %v2
2477}
2478
2479define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2480; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2481; CHECK:         .functype load_ext_v2i64_from_numeric_address () -> (v128)
2482; CHECK-NEXT:  # %bb.0:
2483; CHECK-NEXT:    i32.const 0
2484; CHECK-NEXT:    i64x2.load32x2_u 32
2485; CHECK-NEXT:    # fallthrough-return
2486  %s = inttoptr i32 32 to <2 x i32>*
2487  %v = load <2 x i32>, <2 x i32>* %s
2488  ret <2 x i32> %v
2489}
2490
2491@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2492define <2 x i64> @load_v2i64_from_global_address() {
2493; CHECK-LABEL: load_v2i64_from_global_address:
2494; CHECK:         .functype load_v2i64_from_global_address () -> (v128)
2495; CHECK-NEXT:  # %bb.0:
2496; CHECK-NEXT:    i32.const 0
2497; CHECK-NEXT:    v128.load gv_v2i64
2498; CHECK-NEXT:    # fallthrough-return
2499  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
2500  ret <2 x i64> %v
2501}
2502
2503@gv_i64 = global i64 42
2504define <2 x i64> @load_splat_v2i64_from_global_address() {
2505; CHECK-LABEL: load_splat_v2i64_from_global_address:
2506; CHECK:         .functype load_splat_v2i64_from_global_address () -> (v128)
2507; CHECK-NEXT:  # %bb.0:
2508; CHECK-NEXT:    i32.const 0
2509; CHECK-NEXT:    v128.load64_splat gv_i64
2510; CHECK-NEXT:    # fallthrough-return
2511  %e = load i64, i64* @gv_i64
2512  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2513  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2514  ret <2 x i64> %v2
2515}
2516
2517@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2518define <2 x i64> @load_sext_v2i64_from_global_address() {
2519; CHECK-LABEL: load_sext_v2i64_from_global_address:
2520; CHECK:         .functype load_sext_v2i64_from_global_address () -> (v128)
2521; CHECK-NEXT:  # %bb.0:
2522; CHECK-NEXT:    i32.const 0
2523; CHECK-NEXT:    i64x2.load32x2_s gv_v2i32
2524; CHECK-NEXT:    # fallthrough-return
2525  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2526  %v2 = sext <2 x i32> %v to <2 x i64>
2527  ret <2 x i64> %v2
2528}
2529
2530define <2 x i64> @load_zext_v2i64_from_global_address() {
2531; CHECK-LABEL: load_zext_v2i64_from_global_address:
2532; CHECK:         .functype load_zext_v2i64_from_global_address () -> (v128)
2533; CHECK-NEXT:  # %bb.0:
2534; CHECK-NEXT:    i32.const 0
2535; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2536; CHECK-NEXT:    # fallthrough-return
2537  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2538  %v2 = zext <2 x i32> %v to <2 x i64>
2539  ret <2 x i64> %v2
2540}
2541
2542define <2 x i32> @load_ext_v2i64_from_global_address() {
2543; CHECK-LABEL: load_ext_v2i64_from_global_address:
2544; CHECK:         .functype load_ext_v2i64_from_global_address () -> (v128)
2545; CHECK-NEXT:  # %bb.0:
2546; CHECK-NEXT:    i32.const 0
2547; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2548; CHECK-NEXT:    # fallthrough-return
2549  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2550  ret <2 x i32> %v
2551}
2552
2553define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
2554; CHECK-LABEL: store_v2i64:
2555; CHECK:         .functype store_v2i64 (v128, i32) -> ()
2556; CHECK-NEXT:  # %bb.0:
2557; CHECK-NEXT:    local.get 1
2558; CHECK-NEXT:    local.get 0
2559; CHECK-NEXT:    v128.store 0
2560; CHECK-NEXT:    # fallthrough-return
2561  store <2 x i64> %v , <2 x i64>* %p
2562  ret void
2563}
2564
2565define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
2566; CHECK-LABEL: store_v2i64_with_folded_offset:
2567; CHECK:         .functype store_v2i64_with_folded_offset (v128, i32) -> ()
2568; CHECK-NEXT:  # %bb.0:
2569; CHECK-NEXT:    local.get 1
2570; CHECK-NEXT:    local.get 0
2571; CHECK-NEXT:    v128.store 16
2572; CHECK-NEXT:    # fallthrough-return
2573  %q = ptrtoint <2 x i64>* %p to i32
2574  %r = add nuw i32 %q, 16
2575  %s = inttoptr i32 %r to <2 x i64>*
2576  store <2 x i64> %v , <2 x i64>* %s
2577  ret void
2578}
2579
2580define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2581; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2582; CHECK:         .functype store_v2i64_with_folded_gep_offset (v128, i32) -> ()
2583; CHECK-NEXT:  # %bb.0:
2584; CHECK-NEXT:    local.get 1
2585; CHECK-NEXT:    local.get 0
2586; CHECK-NEXT:    v128.store 16
2587; CHECK-NEXT:    # fallthrough-return
2588  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2589  store <2 x i64> %v , <2 x i64>* %s
2590  ret void
2591}
2592
2593define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
2594; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2595; CHECK:         .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
2596; CHECK-NEXT:  # %bb.0:
2597; CHECK-NEXT:    local.get 1
2598; CHECK-NEXT:    i32.const -16
2599; CHECK-NEXT:    i32.add
2600; CHECK-NEXT:    local.get 0
2601; CHECK-NEXT:    v128.store 0
2602; CHECK-NEXT:    # fallthrough-return
2603  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2604  store <2 x i64> %v , <2 x i64>* %s
2605  ret void
2606}
2607
2608define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
2609; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2610; CHECK:         .functype store_v2i64_with_unfolded_offset (v128, i32) -> ()
2611; CHECK-NEXT:  # %bb.0:
2612; CHECK-NEXT:    local.get 1
2613; CHECK-NEXT:    i32.const 16
2614; CHECK-NEXT:    i32.add
2615; CHECK-NEXT:    local.get 0
2616; CHECK-NEXT:    v128.store 0
2617; CHECK-NEXT:    # fallthrough-return
2618  %q = ptrtoint <2 x i64>* %p to i32
2619  %r = add nsw i32 %q, 16
2620  %s = inttoptr i32 %r to <2 x i64>*
2621  store <2 x i64> %v , <2 x i64>* %s
2622  ret void
2623}
2624
2625define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2626; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2627; CHECK:         .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> ()
2628; CHECK-NEXT:  # %bb.0:
2629; CHECK-NEXT:    local.get 1
2630; CHECK-NEXT:    i32.const 16
2631; CHECK-NEXT:    i32.add
2632; CHECK-NEXT:    local.get 0
2633; CHECK-NEXT:    v128.store 0
2634; CHECK-NEXT:    # fallthrough-return
2635  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2636  store <2 x i64> %v , <2 x i64>* %s
2637  ret void
2638}
2639
2640define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2641; CHECK-LABEL: store_v2i64_to_numeric_address:
2642; CHECK:         .functype store_v2i64_to_numeric_address (v128) -> ()
2643; CHECK-NEXT:  # %bb.0:
2644; CHECK-NEXT:    i32.const 0
2645; CHECK-NEXT:    local.get 0
2646; CHECK-NEXT:    v128.store 32
2647; CHECK-NEXT:    # fallthrough-return
2648  %s = inttoptr i32 32 to <2 x i64>*
2649  store <2 x i64> %v , <2 x i64>* %s
2650  ret void
2651}
2652
2653define void @store_v2i64_to_global_address(<2 x i64> %v) {
2654; CHECK-LABEL: store_v2i64_to_global_address:
2655; CHECK:         .functype store_v2i64_to_global_address (v128) -> ()
2656; CHECK-NEXT:  # %bb.0:
2657; CHECK-NEXT:    i32.const 0
2658; CHECK-NEXT:    local.get 0
2659; CHECK-NEXT:    v128.store gv_v2i64
2660; CHECK-NEXT:    # fallthrough-return
2661  store <2 x i64> %v , <2 x i64>* @gv_v2i64
2662  ret void
2663}
2664
2665; ==============================================================================
2666; 4 x float
2667; ==============================================================================
2668define <4 x float> @load_v4f32(<4 x float>* %p) {
2669; CHECK-LABEL: load_v4f32:
2670; CHECK:         .functype load_v4f32 (i32) -> (v128)
2671; CHECK-NEXT:  # %bb.0:
2672; CHECK-NEXT:    local.get 0
2673; CHECK-NEXT:    v128.load 0
2674; CHECK-NEXT:    # fallthrough-return
2675  %v = load <4 x float>, <4 x float>* %p
2676  ret <4 x float> %v
2677}
2678
2679define <4 x float> @load_splat_v4f32(float* %p) {
2680; CHECK-LABEL: load_splat_v4f32:
2681; CHECK:         .functype load_splat_v4f32 (i32) -> (v128)
2682; CHECK-NEXT:  # %bb.0:
2683; CHECK-NEXT:    local.get 0
2684; CHECK-NEXT:    v128.load32_splat 0
2685; CHECK-NEXT:    # fallthrough-return
2686  %e = load float, float* %p
2687  %v1 = insertelement <4 x float> undef, float %e, i32 0
2688  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2689  ret <4 x float> %v2
2690}
2691
2692define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
2693; CHECK-LABEL: load_v4f32_with_folded_offset:
2694; CHECK:         .functype load_v4f32_with_folded_offset (i32) -> (v128)
2695; CHECK-NEXT:  # %bb.0:
2696; CHECK-NEXT:    local.get 0
2697; CHECK-NEXT:    v128.load 16
2698; CHECK-NEXT:    # fallthrough-return
2699  %q = ptrtoint <4 x float>* %p to i32
2700  %r = add nuw i32 %q, 16
2701  %s = inttoptr i32 %r to <4 x float>*
2702  %v = load <4 x float>, <4 x float>* %s
2703  ret <4 x float> %v
2704}
2705
2706define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
2707; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2708; CHECK:         .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
2709; CHECK-NEXT:  # %bb.0:
2710; CHECK-NEXT:    local.get 0
2711; CHECK-NEXT:    v128.load32_splat 16
2712; CHECK-NEXT:    # fallthrough-return
2713  %q = ptrtoint float* %p to i32
2714  %r = add nuw i32 %q, 16
2715  %s = inttoptr i32 %r to float*
2716  %e = load float, float* %s
2717  %v1 = insertelement <4 x float> undef, float %e, i32 0
2718  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2719  ret <4 x float> %v2
2720}
2721
2722define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
2723; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2724; CHECK:         .functype load_v4f32_with_folded_gep_offset (i32) -> (v128)
2725; CHECK-NEXT:  # %bb.0:
2726; CHECK-NEXT:    local.get 0
2727; CHECK-NEXT:    v128.load 16
2728; CHECK-NEXT:    # fallthrough-return
2729  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2730  %v = load <4 x float>, <4 x float>* %s
2731  ret <4 x float> %v
2732}
2733
2734define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
2735; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2736; CHECK:         .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
2737; CHECK-NEXT:  # %bb.0:
2738; CHECK-NEXT:    local.get 0
2739; CHECK-NEXT:    v128.load32_splat 4
2740; CHECK-NEXT:    # fallthrough-return
2741  %s = getelementptr inbounds float, float* %p, i32 1
2742  %e = load float, float* %s
2743  %v1 = insertelement <4 x float> undef, float %e, i32 0
2744  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2745  ret <4 x float> %v2
2746}
2747
2748define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
2749; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2750; CHECK:         .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2751; CHECK-NEXT:  # %bb.0:
2752; CHECK-NEXT:    local.get 0
2753; CHECK-NEXT:    i32.const -16
2754; CHECK-NEXT:    i32.add
2755; CHECK-NEXT:    v128.load 0
2756; CHECK-NEXT:    # fallthrough-return
2757  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2758  %v = load <4 x float>, <4 x float>* %s
2759  ret <4 x float> %v
2760}
2761
2762define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
2763; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2764; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2765; CHECK-NEXT:  # %bb.0:
2766; CHECK-NEXT:    local.get 0
2767; CHECK-NEXT:    i32.const -4
2768; CHECK-NEXT:    i32.add
2769; CHECK-NEXT:    v128.load32_splat 0
2770; CHECK-NEXT:    # fallthrough-return
2771  %s = getelementptr inbounds float, float* %p, i32 -1
2772  %e = load float, float* %s
2773  %v1 = insertelement <4 x float> undef, float %e, i32 0
2774  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2775  ret <4 x float> %v2
2776}
2777
2778define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
2779; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2780; CHECK:         .functype load_v4f32_with_unfolded_offset (i32) -> (v128)
2781; CHECK-NEXT:  # %bb.0:
2782; CHECK-NEXT:    local.get 0
2783; CHECK-NEXT:    i32.const 16
2784; CHECK-NEXT:    i32.add
2785; CHECK-NEXT:    v128.load 0
2786; CHECK-NEXT:    # fallthrough-return
2787  %q = ptrtoint <4 x float>* %p to i32
2788  %r = add nsw i32 %q, 16
2789  %s = inttoptr i32 %r to <4 x float>*
2790  %v = load <4 x float>, <4 x float>* %s
2791  ret <4 x float> %v
2792}
2793
2794define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
2795; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2796; CHECK:         .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128)
2797; CHECK-NEXT:  # %bb.0:
2798; CHECK-NEXT:    local.get 0
2799; CHECK-NEXT:    i32.const 16
2800; CHECK-NEXT:    i32.add
2801; CHECK-NEXT:    v128.load32_splat 0
2802; CHECK-NEXT:    # fallthrough-return
2803  %q = ptrtoint float* %p to i32
2804  %r = add nsw i32 %q, 16
2805  %s = inttoptr i32 %r to float*
2806  %e = load float, float* %s
2807  %v1 = insertelement <4 x float> undef, float %e, i32 0
2808  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2809  ret <4 x float> %v2
2810}
2811
2812define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
2813; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2814; CHECK:         .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2815; CHECK-NEXT:  # %bb.0:
2816; CHECK-NEXT:    local.get 0
2817; CHECK-NEXT:    i32.const 16
2818; CHECK-NEXT:    i32.add
2819; CHECK-NEXT:    v128.load 0
2820; CHECK-NEXT:    # fallthrough-return
2821  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2822  %v = load <4 x float>, <4 x float>* %s
2823  ret <4 x float> %v
2824}
2825
2826define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
2827; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
2828; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2829; CHECK-NEXT:  # %bb.0:
2830; CHECK-NEXT:    local.get 0
2831; CHECK-NEXT:    i32.const 4
2832; CHECK-NEXT:    i32.add
2833; CHECK-NEXT:    v128.load32_splat 0
2834; CHECK-NEXT:    # fallthrough-return
2835  %s = getelementptr float, float* %p, i32 1
2836  %e = load float, float* %s
2837  %v1 = insertelement <4 x float> undef, float %e, i32 0
2838  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2839  ret <4 x float> %v2
2840}
2841
2842define <4 x float> @load_v4f32_from_numeric_address() {
2843; CHECK-LABEL: load_v4f32_from_numeric_address:
2844; CHECK:         .functype load_v4f32_from_numeric_address () -> (v128)
2845; CHECK-NEXT:  # %bb.0:
2846; CHECK-NEXT:    i32.const 0
2847; CHECK-NEXT:    v128.load 32
2848; CHECK-NEXT:    # fallthrough-return
2849  %s = inttoptr i32 32 to <4 x float>*
2850  %v = load <4 x float>, <4 x float>* %s
2851  ret <4 x float> %v
2852}
2853
2854define <4 x float> @load_splat_v4f32_from_numeric_address() {
2855; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
2856; CHECK:         .functype load_splat_v4f32_from_numeric_address () -> (v128)
2857; CHECK-NEXT:  # %bb.0:
2858; CHECK-NEXT:    i32.const 0
2859; CHECK-NEXT:    v128.load32_splat 32
2860; CHECK-NEXT:    # fallthrough-return
2861  %s = inttoptr i32 32 to float*
2862  %e = load float, float* %s
2863  %v1 = insertelement <4 x float> undef, float %e, i32 0
2864  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2865  ret <4 x float> %v2
2866}
2867
2868@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
2869define <4 x float> @load_v4f32_from_global_address() {
2870; CHECK-LABEL: load_v4f32_from_global_address:
2871; CHECK:         .functype load_v4f32_from_global_address () -> (v128)
2872; CHECK-NEXT:  # %bb.0:
2873; CHECK-NEXT:    i32.const 0
2874; CHECK-NEXT:    v128.load gv_v4f32
2875; CHECK-NEXT:    # fallthrough-return
2876  %v = load <4 x float>, <4 x float>* @gv_v4f32
2877  ret <4 x float> %v
2878}
2879
2880@gv_f32 = global float 42.
2881define <4 x float> @load_splat_v4f32_from_global_address() {
2882; CHECK-LABEL: load_splat_v4f32_from_global_address:
2883; CHECK:         .functype load_splat_v4f32_from_global_address () -> (v128)
2884; CHECK-NEXT:  # %bb.0:
2885; CHECK-NEXT:    i32.const 0
2886; CHECK-NEXT:    v128.load32_splat gv_f32
2887; CHECK-NEXT:    # fallthrough-return
2888  %e = load float, float* @gv_f32
2889  %v1 = insertelement <4 x float> undef, float %e, i32 0
2890  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2891  ret <4 x float> %v2
2892}
2893
2894define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
2895; CHECK-LABEL: store_v4f32:
2896; CHECK:         .functype store_v4f32 (v128, i32) -> ()
2897; CHECK-NEXT:  # %bb.0:
2898; CHECK-NEXT:    local.get 1
2899; CHECK-NEXT:    local.get 0
2900; CHECK-NEXT:    v128.store 0
2901; CHECK-NEXT:    # fallthrough-return
2902  store <4 x float> %v , <4 x float>* %p
2903  ret void
2904}
2905
2906define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
2907; CHECK-LABEL: store_v4f32_with_folded_offset:
2908; CHECK:         .functype store_v4f32_with_folded_offset (v128, i32) -> ()
2909; CHECK-NEXT:  # %bb.0:
2910; CHECK-NEXT:    local.get 1
2911; CHECK-NEXT:    local.get 0
2912; CHECK-NEXT:    v128.store 16
2913; CHECK-NEXT:    # fallthrough-return
2914  %q = ptrtoint <4 x float>* %p to i32
2915  %r = add nuw i32 %q, 16
2916  %s = inttoptr i32 %r to <4 x float>*
2917  store <4 x float> %v , <4 x float>* %s
2918  ret void
2919}
2920
2921define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2922; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
2923; CHECK:         .functype store_v4f32_with_folded_gep_offset (v128, i32) -> ()
2924; CHECK-NEXT:  # %bb.0:
2925; CHECK-NEXT:    local.get 1
2926; CHECK-NEXT:    local.get 0
2927; CHECK-NEXT:    v128.store 16
2928; CHECK-NEXT:    # fallthrough-return
2929  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2930  store <4 x float> %v , <4 x float>* %s
2931  ret void
2932}
2933
2934define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
2935; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
2936; CHECK:         .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> ()
2937; CHECK-NEXT:  # %bb.0:
2938; CHECK-NEXT:    local.get 1
2939; CHECK-NEXT:    i32.const -16
2940; CHECK-NEXT:    i32.add
2941; CHECK-NEXT:    local.get 0
2942; CHECK-NEXT:    v128.store 0
2943; CHECK-NEXT:    # fallthrough-return
2944  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2945  store <4 x float> %v , <4 x float>* %s
2946  ret void
2947}
2948
2949define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
2950; CHECK-LABEL: store_v4f32_with_unfolded_offset:
2951; CHECK:         .functype store_v4f32_with_unfolded_offset (v128, i32) -> ()
2952; CHECK-NEXT:  # %bb.0:
2953; CHECK-NEXT:    local.get 1
2954; CHECK-NEXT:    i32.const 16
2955; CHECK-NEXT:    i32.add
2956; CHECK-NEXT:    local.get 0
2957; CHECK-NEXT:    v128.store 0
2958; CHECK-NEXT:    # fallthrough-return
2959  %q = ptrtoint <4 x float>* %p to i32
2960  %r = add nsw i32 %q, 16
2961  %s = inttoptr i32 %r to <4 x float>*
2962  store <4 x float> %v , <4 x float>* %s
2963  ret void
2964}
2965
2966define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2967; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
2968; CHECK:         .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> ()
2969; CHECK-NEXT:  # %bb.0:
2970; CHECK-NEXT:    local.get 1
2971; CHECK-NEXT:    i32.const 16
2972; CHECK-NEXT:    i32.add
2973; CHECK-NEXT:    local.get 0
2974; CHECK-NEXT:    v128.store 0
2975; CHECK-NEXT:    # fallthrough-return
2976  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2977  store <4 x float> %v , <4 x float>* %s
2978  ret void
2979}
2980
2981define void @store_v4f32_to_numeric_address(<4 x float> %v) {
2982; CHECK-LABEL: store_v4f32_to_numeric_address:
2983; CHECK:         .functype store_v4f32_to_numeric_address (v128) -> ()
2984; CHECK-NEXT:  # %bb.0:
2985; CHECK-NEXT:    i32.const 0
2986; CHECK-NEXT:    local.get 0
2987; CHECK-NEXT:    v128.store 32
2988; CHECK-NEXT:    # fallthrough-return
2989  %s = inttoptr i32 32 to <4 x float>*
2990  store <4 x float> %v , <4 x float>* %s
2991  ret void
2992}
2993
2994define void @store_v4f32_to_global_address(<4 x float> %v) {
2995; CHECK-LABEL: store_v4f32_to_global_address:
2996; CHECK:         .functype store_v4f32_to_global_address (v128) -> ()
2997; CHECK-NEXT:  # %bb.0:
2998; CHECK-NEXT:    i32.const 0
2999; CHECK-NEXT:    local.get 0
3000; CHECK-NEXT:    v128.store gv_v4f32
3001; CHECK-NEXT:    # fallthrough-return
3002  store <4 x float> %v , <4 x float>* @gv_v4f32
3003  ret void
3004}
3005
3006; ==============================================================================
3007; 2 x double
3008; ==============================================================================
3009define <2 x double> @load_v2f64(<2 x double>* %p) {
3010; CHECK-LABEL: load_v2f64:
3011; CHECK:         .functype load_v2f64 (i32) -> (v128)
3012; CHECK-NEXT:  # %bb.0:
3013; CHECK-NEXT:    local.get 0
3014; CHECK-NEXT:    v128.load 0
3015; CHECK-NEXT:    # fallthrough-return
3016  %v = load <2 x double>, <2 x double>* %p
3017  ret <2 x double> %v
3018}
3019
3020define <2 x double> @load_splat_v2f64(double* %p) {
3021; CHECK-LABEL: load_splat_v2f64:
3022; CHECK:         .functype load_splat_v2f64 (i32) -> (v128)
3023; CHECK-NEXT:  # %bb.0:
3024; CHECK-NEXT:    local.get 0
3025; CHECK-NEXT:    v128.load64_splat 0
3026; CHECK-NEXT:    # fallthrough-return
3027  %e = load double, double* %p
3028  %v1 = insertelement <2 x double> undef, double %e, i32 0
3029  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3030  ret <2 x double> %v2
3031}
3032
3033define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
3034; CHECK-LABEL: load_v2f64_with_folded_offset:
3035; CHECK:         .functype load_v2f64_with_folded_offset (i32) -> (v128)
3036; CHECK-NEXT:  # %bb.0:
3037; CHECK-NEXT:    local.get 0
3038; CHECK-NEXT:    v128.load 16
3039; CHECK-NEXT:    # fallthrough-return
3040  %q = ptrtoint <2 x double>* %p to i32
3041  %r = add nuw i32 %q, 16
3042  %s = inttoptr i32 %r to <2 x double>*
3043  %v = load <2 x double>, <2 x double>* %s
3044  ret <2 x double> %v
3045}
3046
3047define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
3048; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
3049; CHECK:         .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
3050; CHECK-NEXT:  # %bb.0:
3051; CHECK-NEXT:    local.get 0
3052; CHECK-NEXT:    v128.load64_splat 16
3053; CHECK-NEXT:    # fallthrough-return
3054  %q = ptrtoint double* %p to i32
3055  %r = add nuw i32 %q, 16
3056  %s = inttoptr i32 %r to double*
3057  %e = load double, double* %s
3058  %v1 = insertelement <2 x double> undef, double %e, i32 0
3059  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3060  ret <2 x double> %v2
3061}
3062
3063define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
3064; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
3065; CHECK:         .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
3066; CHECK-NEXT:  # %bb.0:
3067; CHECK-NEXT:    local.get 0
3068; CHECK-NEXT:    v128.load 16
3069; CHECK-NEXT:    # fallthrough-return
3070  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
3071  %v = load <2 x double>, <2 x double>* %s
3072  ret <2 x double> %v
3073}
3074
3075define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
3076; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
3077; CHECK:         .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
3078; CHECK-NEXT:  # %bb.0:
3079; CHECK-NEXT:    local.get 0
3080; CHECK-NEXT:    v128.load64_splat 8
3081; CHECK-NEXT:    # fallthrough-return
3082  %s = getelementptr inbounds double, double* %p, i32 1
3083  %e = load double, double* %s
3084  %v1 = insertelement <2 x double> undef, double %e, i32 0
3085  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3086  ret <2 x double> %v2
3087}
3088
3089define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
3090; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
3091; CHECK:         .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3092; CHECK-NEXT:  # %bb.0:
3093; CHECK-NEXT:    local.get 0
3094; CHECK-NEXT:    i32.const -16
3095; CHECK-NEXT:    i32.add
3096; CHECK-NEXT:    v128.load 0
3097; CHECK-NEXT:    # fallthrough-return
3098  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
3099  %v = load <2 x double>, <2 x double>* %s
3100  ret <2 x double> %v
3101}
3102
3103define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
3104; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
3105; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3106; CHECK-NEXT:  # %bb.0:
3107; CHECK-NEXT:    local.get 0
3108; CHECK-NEXT:    i32.const -8
3109; CHECK-NEXT:    i32.add
3110; CHECK-NEXT:    v128.load64_splat 0
3111; CHECK-NEXT:    # fallthrough-return
3112  %s = getelementptr inbounds double, double* %p, i32 -1
3113  %e = load double, double* %s
3114  %v1 = insertelement <2 x double> undef, double %e, i32 0
3115  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3116  ret <2 x double> %v2
3117}
3118
3119define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
3120; CHECK-LABEL: load_v2f64_with_unfolded_offset:
3121; CHECK:         .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
3122; CHECK-NEXT:  # %bb.0:
3123; CHECK-NEXT:    local.get 0
3124; CHECK-NEXT:    i32.const 16
3125; CHECK-NEXT:    i32.add
3126; CHECK-NEXT:    v128.load 0
3127; CHECK-NEXT:    # fallthrough-return
3128  %q = ptrtoint <2 x double>* %p to i32
3129  %r = add nsw i32 %q, 16
3130  %s = inttoptr i32 %r to <2 x double>*
3131  %v = load <2 x double>, <2 x double>* %s
3132  ret <2 x double> %v
3133}
3134
3135define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
3136; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
3137; CHECK:         .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128)
3138; CHECK-NEXT:  # %bb.0:
3139; CHECK-NEXT:    local.get 0
3140; CHECK-NEXT:    i32.const 16
3141; CHECK-NEXT:    i32.add
3142; CHECK-NEXT:    v128.load64_splat 0
3143; CHECK-NEXT:    # fallthrough-return
3144  %q = ptrtoint double* %p to i32
3145  %r = add nsw i32 %q, 16
3146  %s = inttoptr i32 %r to double*
3147  %e = load double, double* %s
3148  %v1 = insertelement <2 x double> undef, double %e, i32 0
3149  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3150  ret <2 x double> %v2
3151}
3152
3153define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
3154; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
3155; CHECK:         .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3156; CHECK-NEXT:  # %bb.0:
3157; CHECK-NEXT:    local.get 0
3158; CHECK-NEXT:    i32.const 16
3159; CHECK-NEXT:    i32.add
3160; CHECK-NEXT:    v128.load 0
3161; CHECK-NEXT:    # fallthrough-return
3162  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
3163  %v = load <2 x double>, <2 x double>* %s
3164  ret <2 x double> %v
3165}
3166
3167define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
3168; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
3169; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3170; CHECK-NEXT:  # %bb.0:
3171; CHECK-NEXT:    local.get 0
3172; CHECK-NEXT:    i32.const 8
3173; CHECK-NEXT:    i32.add
3174; CHECK-NEXT:    v128.load64_splat 0
3175; CHECK-NEXT:    # fallthrough-return
3176  %s = getelementptr double, double* %p, i32 1
3177  %e = load double, double* %s
3178  %v1 = insertelement <2 x double> undef, double %e, i32 0
3179  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3180  ret <2 x double> %v2
3181}
3182
3183define <2 x double> @load_v2f64_from_numeric_address() {
3184; CHECK-LABEL: load_v2f64_from_numeric_address:
3185; CHECK:         .functype load_v2f64_from_numeric_address () -> (v128)
3186; CHECK-NEXT:  # %bb.0:
3187; CHECK-NEXT:    i32.const 0
3188; CHECK-NEXT:    v128.load 32
3189; CHECK-NEXT:    # fallthrough-return
3190  %s = inttoptr i32 32 to <2 x double>*
3191  %v = load <2 x double>, <2 x double>* %s
3192  ret <2 x double> %v
3193}
3194
3195define <2 x double> @load_splat_v2f64_from_numeric_address() {
3196; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
3197; CHECK:         .functype load_splat_v2f64_from_numeric_address () -> (v128)
3198; CHECK-NEXT:  # %bb.0:
3199; CHECK-NEXT:    i32.const 0
3200; CHECK-NEXT:    v128.load64_splat 32
3201; CHECK-NEXT:    # fallthrough-return
3202  %s = inttoptr i32 32 to double*
3203  %e = load double, double* %s
3204  %v1 = insertelement <2 x double> undef, double %e, i32 0
3205  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3206  ret <2 x double> %v2
3207}
3208
3209@gv_v2f64 = global <2 x double> <double 42., double 42.>
3210define <2 x double> @load_v2f64_from_global_address() {
3211; CHECK-LABEL: load_v2f64_from_global_address:
3212; CHECK:         .functype load_v2f64_from_global_address () -> (v128)
3213; CHECK-NEXT:  # %bb.0:
3214; CHECK-NEXT:    i32.const 0
3215; CHECK-NEXT:    v128.load gv_v2f64
3216; CHECK-NEXT:    # fallthrough-return
3217  %v = load <2 x double>, <2 x double>* @gv_v2f64
3218  ret <2 x double> %v
3219}
3220
3221@gv_f64 = global double 42.
3222define <2 x double> @load_splat_v2f64_from_global_address() {
3223; CHECK-LABEL: load_splat_v2f64_from_global_address:
3224; CHECK:         .functype load_splat_v2f64_from_global_address () -> (v128)
3225; CHECK-NEXT:  # %bb.0:
3226; CHECK-NEXT:    i32.const 0
3227; CHECK-NEXT:    v128.load64_splat gv_f64
3228; CHECK-NEXT:    # fallthrough-return
3229  %e = load double, double* @gv_f64
3230  %v1 = insertelement <2 x double> undef, double %e, i32 0
3231  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3232  ret <2 x double> %v2
3233}
3234
3235define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
3236; CHECK-LABEL: store_v2f64:
3237; CHECK:         .functype store_v2f64 (v128, i32) -> ()
3238; CHECK-NEXT:  # %bb.0:
3239; CHECK-NEXT:    local.get 1
3240; CHECK-NEXT:    local.get 0
3241; CHECK-NEXT:    v128.store 0
3242; CHECK-NEXT:    # fallthrough-return
3243  store <2 x double> %v , <2 x double>* %p
3244  ret void
3245}
3246
3247define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
3248; CHECK-LABEL: store_v2f64_with_folded_offset:
3249; CHECK:         .functype store_v2f64_with_folded_offset (v128, i32) -> ()
3250; CHECK-NEXT:  # %bb.0:
3251; CHECK-NEXT:    local.get 1
3252; CHECK-NEXT:    local.get 0
3253; CHECK-NEXT:    v128.store 16
3254; CHECK-NEXT:    # fallthrough-return
3255  %q = ptrtoint <2 x double>* %p to i32
3256  %r = add nuw i32 %q, 16
3257  %s = inttoptr i32 %r to <2 x double>*
3258  store <2 x double> %v , <2 x double>* %s
3259  ret void
3260}
3261
3262define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
3263; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
3264; CHECK:         .functype store_v2f64_with_folded_gep_offset (v128, i32) -> ()
3265; CHECK-NEXT:  # %bb.0:
3266; CHECK-NEXT:    local.get 1
3267; CHECK-NEXT:    local.get 0
3268; CHECK-NEXT:    v128.store 16
3269; CHECK-NEXT:    # fallthrough-return
3270  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
3271  store <2 x double> %v , <2 x double>* %s
3272  ret void
3273}
3274
3275define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
3276; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
3277; CHECK:         .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> ()
3278; CHECK-NEXT:  # %bb.0:
3279; CHECK-NEXT:    local.get 1
3280; CHECK-NEXT:    i32.const -16
3281; CHECK-NEXT:    i32.add
3282; CHECK-NEXT:    local.get 0
3283; CHECK-NEXT:    v128.store 0
3284; CHECK-NEXT:    # fallthrough-return
3285  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
3286  store <2 x double> %v , <2 x double>* %s
3287  ret void
3288}
3289
3290define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
3291; CHECK-LABEL: store_v2f64_with_unfolded_offset:
3292; CHECK:         .functype store_v2f64_with_unfolded_offset (v128, i32) -> ()
3293; CHECK-NEXT:  # %bb.0:
3294; CHECK-NEXT:    local.get 1
3295; CHECK-NEXT:    i32.const 16
3296; CHECK-NEXT:    i32.add
3297; CHECK-NEXT:    local.get 0
3298; CHECK-NEXT:    v128.store 0
3299; CHECK-NEXT:    # fallthrough-return
3300  %q = ptrtoint <2 x double>* %p to i32
3301  %r = add nsw i32 %q, 16
3302  %s = inttoptr i32 %r to <2 x double>*
3303  store <2 x double> %v , <2 x double>* %s
3304  ret void
3305}
3306
3307define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
3308; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
3309; CHECK:         .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> ()
3310; CHECK-NEXT:  # %bb.0:
3311; CHECK-NEXT:    local.get 1
3312; CHECK-NEXT:    i32.const 16
3313; CHECK-NEXT:    i32.add
3314; CHECK-NEXT:    local.get 0
3315; CHECK-NEXT:    v128.store 0
3316; CHECK-NEXT:    # fallthrough-return
3317  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
3318  store <2 x double> %v , <2 x double>* %s
3319  ret void
3320}
3321
3322define void @store_v2f64_to_numeric_address(<2 x double> %v) {
3323; CHECK-LABEL: store_v2f64_to_numeric_address:
3324; CHECK:         .functype store_v2f64_to_numeric_address (v128) -> ()
3325; CHECK-NEXT:  # %bb.0:
3326; CHECK-NEXT:    i32.const 0
3327; CHECK-NEXT:    local.get 0
3328; CHECK-NEXT:    v128.store 32
3329; CHECK-NEXT:    # fallthrough-return
3330  %s = inttoptr i32 32 to <2 x double>*
3331  store <2 x double> %v , <2 x double>* %s
3332  ret void
3333}
3334
3335define void @store_v2f64_to_global_address(<2 x double> %v) {
3336; CHECK-LABEL: store_v2f64_to_global_address:
3337; CHECK:         .functype store_v2f64_to_global_address (v128) -> ()
3338; CHECK-NEXT:  # %bb.0:
3339; CHECK-NEXT:    i32.const 0
3340; CHECK-NEXT:    local.get 0
3341; CHECK-NEXT:    v128.store gv_v2f64
3342; CHECK-NEXT:    # fallthrough-return
3343  store <2 x double> %v , <2 x double>* @gv_v2f64
3344  ret void
3345}
3346