1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test SIMD loads and stores
5
6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
7target triple = "wasm32-unknown-unknown"
8
9; ==============================================================================
10; 16 x i8
11; ==============================================================================
12define <16 x i8> @load_v16i8(<16 x i8>* %p) {
13; CHECK-LABEL: load_v16i8:
14; CHECK:         .functype load_v16i8 (i32) -> (v128)
15; CHECK-NEXT:  # %bb.0:
16; CHECK-NEXT:    local.get 0
17; CHECK-NEXT:    v128.load 0
18; CHECK-NEXT:    # fallthrough-return
19  %v = load <16 x i8>, <16 x i8>* %p
20  ret <16 x i8> %v
21}
22
23define <16 x i8> @load_splat_v16i8(i8* %p) {
24; CHECK-LABEL: load_splat_v16i8:
25; CHECK:         .functype load_splat_v16i8 (i32) -> (v128)
26; CHECK-NEXT:  # %bb.0:
27; CHECK-NEXT:    local.get 0
28; CHECK-NEXT:    v8x16.load_splat 0
29; CHECK-NEXT:    # fallthrough-return
30  %e = load i8, i8* %p
31  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
32  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
33  ret <16 x i8> %v2
34}
35
36define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
37; CHECK-LABEL: load_v16i8_with_folded_offset:
38; CHECK:         .functype load_v16i8_with_folded_offset (i32) -> (v128)
39; CHECK-NEXT:  # %bb.0:
40; CHECK-NEXT:    local.get 0
41; CHECK-NEXT:    v128.load 16
42; CHECK-NEXT:    # fallthrough-return
43  %q = ptrtoint <16 x i8>* %p to i32
44  %r = add nuw i32 %q, 16
45  %s = inttoptr i32 %r to <16 x i8>*
46  %v = load <16 x i8>, <16 x i8>* %s
47  ret <16 x i8> %v
48}
49
50define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
51; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
52; CHECK:         .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
53; CHECK-NEXT:  # %bb.0:
54; CHECK-NEXT:    local.get 0
55; CHECK-NEXT:    v8x16.load_splat 16
56; CHECK-NEXT:    # fallthrough-return
57  %q = ptrtoint i8* %p to i32
58  %r = add nuw i32 %q, 16
59  %s = inttoptr i32 %r to i8*
60  %e = load i8, i8* %s
61  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
62  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
63  ret <16 x i8> %v2
64}
65
66define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
67; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
68; CHECK:         .functype load_v16i8_with_folded_gep_offset (i32) -> (v128)
69; CHECK-NEXT:  # %bb.0:
70; CHECK-NEXT:    local.get 0
71; CHECK-NEXT:    v128.load 16
72; CHECK-NEXT:    # fallthrough-return
73  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
74  %v = load <16 x i8>, <16 x i8>* %s
75  ret <16 x i8> %v
76}
77
78define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
79; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
80; CHECK:         .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
81; CHECK-NEXT:  # %bb.0:
82; CHECK-NEXT:    local.get 0
83; CHECK-NEXT:    v8x16.load_splat 1
84; CHECK-NEXT:    # fallthrough-return
85  %s = getelementptr inbounds i8, i8* %p, i32 1
86  %e = load i8, i8* %s
87  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
88  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
89  ret <16 x i8> %v2
90}
91
92define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
93; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
94; CHECK:         .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
95; CHECK-NEXT:  # %bb.0:
96; CHECK-NEXT:    local.get 0
97; CHECK-NEXT:    i32.const -16
98; CHECK-NEXT:    i32.add
99; CHECK-NEXT:    v128.load 0
100; CHECK-NEXT:    # fallthrough-return
101  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
102  %v = load <16 x i8>, <16 x i8>* %s
103  ret <16 x i8> %v
104}
105
106define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
107; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
108; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
109; CHECK-NEXT:  # %bb.0:
110; CHECK-NEXT:    local.get 0
111; CHECK-NEXT:    i32.const -1
112; CHECK-NEXT:    i32.add
113; CHECK-NEXT:    v8x16.load_splat 0
114; CHECK-NEXT:    # fallthrough-return
115  %s = getelementptr inbounds i8, i8* %p, i32 -1
116  %e = load i8, i8* %s
117  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
118  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
119  ret <16 x i8> %v2
120}
121
122define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
123; CHECK-LABEL: load_v16i8_with_unfolded_offset:
124; CHECK:         .functype load_v16i8_with_unfolded_offset (i32) -> (v128)
125; CHECK-NEXT:  # %bb.0:
126; CHECK-NEXT:    local.get 0
127; CHECK-NEXT:    i32.const 16
128; CHECK-NEXT:    i32.add
129; CHECK-NEXT:    v128.load 0
130; CHECK-NEXT:    # fallthrough-return
131  %q = ptrtoint <16 x i8>* %p to i32
132  %r = add nsw i32 %q, 16
133  %s = inttoptr i32 %r to <16 x i8>*
134  %v = load <16 x i8>, <16 x i8>* %s
135  ret <16 x i8> %v
136}
137
138define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
139; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
140; CHECK:         .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128)
141; CHECK-NEXT:  # %bb.0:
142; CHECK-NEXT:    local.get 0
143; CHECK-NEXT:    i32.const 16
144; CHECK-NEXT:    i32.add
145; CHECK-NEXT:    v8x16.load_splat 0
146; CHECK-NEXT:    # fallthrough-return
147  %q = ptrtoint i8* %p to i32
148  %r = add nsw i32 %q, 16
149  %s = inttoptr i32 %r to i8*
150  %e = load i8, i8* %s
151  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
152  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
153  ret <16 x i8> %v2
154}
155
156define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
157; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
158; CHECK:         .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128)
159; CHECK-NEXT:  # %bb.0:
160; CHECK-NEXT:    local.get 0
161; CHECK-NEXT:    i32.const 16
162; CHECK-NEXT:    i32.add
163; CHECK-NEXT:    v128.load 0
164; CHECK-NEXT:    # fallthrough-return
165  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
166  %v = load <16 x i8>, <16 x i8>* %s
167  ret <16 x i8> %v
168}
169
170define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
171; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
172; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128)
173; CHECK-NEXT:  # %bb.0:
174; CHECK-NEXT:    local.get 0
175; CHECK-NEXT:    i32.const 1
176; CHECK-NEXT:    i32.add
177; CHECK-NEXT:    v8x16.load_splat 0
178; CHECK-NEXT:    # fallthrough-return
179  %s = getelementptr i8, i8* %p, i32 1
180  %e = load i8, i8* %s
181  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
182  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
183  ret <16 x i8> %v2
184}
185
186define <16 x i8> @load_v16i8_from_numeric_address() {
187; CHECK-LABEL: load_v16i8_from_numeric_address:
188; CHECK:         .functype load_v16i8_from_numeric_address () -> (v128)
189; CHECK-NEXT:  # %bb.0:
190; CHECK-NEXT:    i32.const 0
191; CHECK-NEXT:    v128.load 32
192; CHECK-NEXT:    # fallthrough-return
193  %s = inttoptr i32 32 to <16 x i8>*
194  %v = load <16 x i8>, <16 x i8>* %s
195  ret <16 x i8> %v
196}
197
198define <16 x i8> @load_splat_v16i8_from_numeric_address() {
199; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
200; CHECK:         .functype load_splat_v16i8_from_numeric_address () -> (v128)
201; CHECK-NEXT:  # %bb.0:
202; CHECK-NEXT:    i32.const 0
203; CHECK-NEXT:    v8x16.load_splat 32
204; CHECK-NEXT:    # fallthrough-return
205  %s = inttoptr i32 32 to i8*
206  %e = load i8, i8* %s
207  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
208  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
209  ret <16 x i8> %v2
210}
211
212@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
213define <16 x i8> @load_v16i8_from_global_address() {
214; CHECK-LABEL: load_v16i8_from_global_address:
215; CHECK:         .functype load_v16i8_from_global_address () -> (v128)
216; CHECK-NEXT:  # %bb.0:
217; CHECK-NEXT:    i32.const 0
218; CHECK-NEXT:    v128.load gv_v16i8
219; CHECK-NEXT:    # fallthrough-return
220  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
221  ret <16 x i8> %v
222}
223
224@gv_i8 = global i8 42
225define <16 x i8> @load_splat_v16i8_from_global_address() {
226; CHECK-LABEL: load_splat_v16i8_from_global_address:
227; CHECK:         .functype load_splat_v16i8_from_global_address () -> (v128)
228; CHECK-NEXT:  # %bb.0:
229; CHECK-NEXT:    i32.const 0
230; CHECK-NEXT:    v8x16.load_splat gv_i8
231; CHECK-NEXT:    # fallthrough-return
232  %e = load i8, i8* @gv_i8
233  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
234  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
235  ret <16 x i8> %v2
236}
237
238define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
239; CHECK-LABEL: store_v16i8:
240; CHECK:         .functype store_v16i8 (v128, i32) -> ()
241; CHECK-NEXT:  # %bb.0:
242; CHECK-NEXT:    local.get 1
243; CHECK-NEXT:    local.get 0
244; CHECK-NEXT:    v128.store 0
245; CHECK-NEXT:    # fallthrough-return
246  store <16 x i8> %v , <16 x i8>* %p
247  ret void
248}
249
250define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
251; CHECK-LABEL: store_v16i8_with_folded_offset:
252; CHECK:         .functype store_v16i8_with_folded_offset (v128, i32) -> ()
253; CHECK-NEXT:  # %bb.0:
254; CHECK-NEXT:    local.get 1
255; CHECK-NEXT:    local.get 0
256; CHECK-NEXT:    v128.store 16
257; CHECK-NEXT:    # fallthrough-return
258  %q = ptrtoint <16 x i8>* %p to i32
259  %r = add nuw i32 %q, 16
260  %s = inttoptr i32 %r to <16 x i8>*
261  store <16 x i8> %v , <16 x i8>* %s
262  ret void
263}
264
265define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
266; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
267; CHECK:         .functype store_v16i8_with_folded_gep_offset (v128, i32) -> ()
268; CHECK-NEXT:  # %bb.0:
269; CHECK-NEXT:    local.get 1
270; CHECK-NEXT:    local.get 0
271; CHECK-NEXT:    v128.store 16
272; CHECK-NEXT:    # fallthrough-return
273  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
274  store <16 x i8> %v , <16 x i8>* %s
275  ret void
276}
277
278define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
279; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
280; CHECK:         .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
281; CHECK-NEXT:  # %bb.0:
282; CHECK-NEXT:    local.get 1
283; CHECK-NEXT:    i32.const -16
284; CHECK-NEXT:    i32.add
285; CHECK-NEXT:    local.get 0
286; CHECK-NEXT:    v128.store 0
287; CHECK-NEXT:    # fallthrough-return
288  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
289  store <16 x i8> %v , <16 x i8>* %s
290  ret void
291}
292
293define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
294; CHECK-LABEL: store_v16i8_with_unfolded_offset:
295; CHECK:         .functype store_v16i8_with_unfolded_offset (v128, i32) -> ()
296; CHECK-NEXT:  # %bb.0:
297; CHECK-NEXT:    local.get 1
298; CHECK-NEXT:    i32.const 16
299; CHECK-NEXT:    i32.add
300; CHECK-NEXT:    local.get 0
301; CHECK-NEXT:    v128.store 0
302; CHECK-NEXT:    # fallthrough-return
303  %q = ptrtoint <16 x i8>* %p to i32
304  %r = add nsw i32 %q, 16
305  %s = inttoptr i32 %r to <16 x i8>*
306  store <16 x i8> %v , <16 x i8>* %s
307  ret void
308}
309
310define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
311; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
312; CHECK:         .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> ()
313; CHECK-NEXT:  # %bb.0:
314; CHECK-NEXT:    local.get 1
315; CHECK-NEXT:    i32.const 16
316; CHECK-NEXT:    i32.add
317; CHECK-NEXT:    local.get 0
318; CHECK-NEXT:    v128.store 0
319; CHECK-NEXT:    # fallthrough-return
320  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
321  store <16 x i8> %v , <16 x i8>* %s
322  ret void
323}
324
325define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
326; CHECK-LABEL: store_v16i8_to_numeric_address:
327; CHECK:         .functype store_v16i8_to_numeric_address (v128) -> ()
328; CHECK-NEXT:  # %bb.0:
329; CHECK-NEXT:    i32.const 0
330; CHECK-NEXT:    local.get 0
331; CHECK-NEXT:    v128.store 32
332; CHECK-NEXT:    # fallthrough-return
333  %s = inttoptr i32 32 to <16 x i8>*
334  store <16 x i8> %v , <16 x i8>* %s
335  ret void
336}
337
338define void @store_v16i8_to_global_address(<16 x i8> %v) {
339; CHECK-LABEL: store_v16i8_to_global_address:
340; CHECK:         .functype store_v16i8_to_global_address (v128) -> ()
341; CHECK-NEXT:  # %bb.0:
342; CHECK-NEXT:    i32.const 0
343; CHECK-NEXT:    local.get 0
344; CHECK-NEXT:    v128.store gv_v16i8
345; CHECK-NEXT:    # fallthrough-return
346  store <16 x i8> %v , <16 x i8>* @gv_v16i8
347  ret void
348}
349
350; ==============================================================================
351; 8 x i16
352; ==============================================================================
353define <8 x i16> @load_v8i16(<8 x i16>* %p) {
354; CHECK-LABEL: load_v8i16:
355; CHECK:         .functype load_v8i16 (i32) -> (v128)
356; CHECK-NEXT:  # %bb.0:
357; CHECK-NEXT:    local.get 0
358; CHECK-NEXT:    v128.load 0
359; CHECK-NEXT:    # fallthrough-return
360  %v = load <8 x i16>, <8 x i16>* %p
361  ret <8 x i16> %v
362}
363
364define <8 x i16> @load_splat_v8i16(i16* %p) {
365; CHECK-LABEL: load_splat_v8i16:
366; CHECK:         .functype load_splat_v8i16 (i32) -> (v128)
367; CHECK-NEXT:  # %bb.0:
368; CHECK-NEXT:    local.get 0
369; CHECK-NEXT:    v16x8.load_splat 0
370; CHECK-NEXT:    # fallthrough-return
371  %e = load i16, i16* %p
372  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
373  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
374  ret <8 x i16> %v2
375}
376
377define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
378; CHECK-LABEL: load_sext_v8i16:
379; CHECK:         .functype load_sext_v8i16 (i32) -> (v128)
380; CHECK-NEXT:  # %bb.0:
381; CHECK-NEXT:    local.get 0
382; CHECK-NEXT:    i16x8.load8x8_s 0
383; CHECK-NEXT:    # fallthrough-return
384  %v = load <8 x i8>, <8 x i8>* %p
385  %v2 = sext <8 x i8> %v to <8 x i16>
386  ret <8 x i16> %v2
387}
388
389define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
390; CHECK-LABEL: load_zext_v8i16:
391; CHECK:         .functype load_zext_v8i16 (i32) -> (v128)
392; CHECK-NEXT:  # %bb.0:
393; CHECK-NEXT:    local.get 0
394; CHECK-NEXT:    i16x8.load8x8_u 0
395; CHECK-NEXT:    # fallthrough-return
396  %v = load <8 x i8>, <8 x i8>* %p
397  %v2 = zext <8 x i8> %v to <8 x i16>
398  ret <8 x i16> %v2
399}
400
401define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
402; CHECK-LABEL: load_ext_v8i16:
403; CHECK:         .functype load_ext_v8i16 (i32) -> (v128)
404; CHECK-NEXT:  # %bb.0:
405; CHECK-NEXT:    local.get 0
406; CHECK-NEXT:    i16x8.load8x8_u 0
407; CHECK-NEXT:    # fallthrough-return
408  %v = load <8 x i8>, <8 x i8>* %p
409  ret <8 x i8> %v
410}
411
412define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
413; CHECK-LABEL: load_v8i16_with_folded_offset:
414; CHECK:         .functype load_v8i16_with_folded_offset (i32) -> (v128)
415; CHECK-NEXT:  # %bb.0:
416; CHECK-NEXT:    local.get 0
417; CHECK-NEXT:    v128.load 16
418; CHECK-NEXT:    # fallthrough-return
419  %q = ptrtoint <8 x i16>* %p to i32
420  %r = add nuw i32 %q, 16
421  %s = inttoptr i32 %r to <8 x i16>*
422  %v = load <8 x i16>, <8 x i16>* %s
423  ret <8 x i16> %v
424}
425
426define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
427; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
428; CHECK:         .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
429; CHECK-NEXT:  # %bb.0:
430; CHECK-NEXT:    local.get 0
431; CHECK-NEXT:    v16x8.load_splat 16
432; CHECK-NEXT:    # fallthrough-return
433  %q = ptrtoint i16* %p to i32
434  %r = add nuw i32 %q, 16
435  %s = inttoptr i32 %r to i16*
436  %e = load i16, i16* %s
437  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
438  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
439  ret <8 x i16> %v2
440}
441
442define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
443; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
444; CHECK:         .functype load_sext_v8i16_with_folded_offset (i32) -> (v128)
445; CHECK-NEXT:  # %bb.0:
446; CHECK-NEXT:    local.get 0
447; CHECK-NEXT:    i16x8.load8x8_s 16
448; CHECK-NEXT:    # fallthrough-return
449  %q = ptrtoint <8 x i8>* %p to i32
450  %r = add nuw i32 %q, 16
451  %s = inttoptr i32 %r to <8 x i8>*
452  %v = load <8 x i8>, <8 x i8>* %s
453  %v2 = sext <8 x i8> %v to <8 x i16>
454  ret <8 x i16> %v2
455}
456
457define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
458; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
459; CHECK:         .functype load_zext_v8i16_with_folded_offset (i32) -> (v128)
460; CHECK-NEXT:  # %bb.0:
461; CHECK-NEXT:    local.get 0
462; CHECK-NEXT:    i16x8.load8x8_u 16
463; CHECK-NEXT:    # fallthrough-return
464  %q = ptrtoint <8 x i8>* %p to i32
465  %r = add nuw i32 %q, 16
466  %s = inttoptr i32 %r to <8 x i8>*
467  %v = load <8 x i8>, <8 x i8>* %s
468  %v2 = zext <8 x i8> %v to <8 x i16>
469  ret <8 x i16> %v2
470}
471
472define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
473; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
474; CHECK:         .functype load_ext_v8i16_with_folded_offset (i32) -> (v128)
475; CHECK-NEXT:  # %bb.0:
476; CHECK-NEXT:    local.get 0
477; CHECK-NEXT:    i16x8.load8x8_u 16
478; CHECK-NEXT:    # fallthrough-return
479  %q = ptrtoint <8 x i8>* %p to i32
480  %r = add nuw i32 %q, 16
481  %s = inttoptr i32 %r to <8 x i8>*
482  %v = load <8 x i8>, <8 x i8>* %s
483  ret <8 x i8> %v
484}
485
486define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
487; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
488; CHECK:         .functype load_v8i16_with_folded_gep_offset (i32) -> (v128)
489; CHECK-NEXT:  # %bb.0:
490; CHECK-NEXT:    local.get 0
491; CHECK-NEXT:    v128.load 16
492; CHECK-NEXT:    # fallthrough-return
493  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
494  %v = load <8 x i16>, <8 x i16>* %s
495  ret <8 x i16> %v
496}
497
498define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
499; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
500; CHECK:         .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
501; CHECK-NEXT:  # %bb.0:
502; CHECK-NEXT:    local.get 0
503; CHECK-NEXT:    v16x8.load_splat 2
504; CHECK-NEXT:    # fallthrough-return
505  %s = getelementptr inbounds i16, i16* %p, i32 1
506  %e = load i16, i16* %s
507  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
508  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
509  ret <8 x i16> %v2
510}
511
512define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
513; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
514; CHECK:         .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128)
515; CHECK-NEXT:  # %bb.0:
516; CHECK-NEXT:    local.get 0
517; CHECK-NEXT:    i16x8.load8x8_s 8
518; CHECK-NEXT:    # fallthrough-return
519  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
520  %v = load <8 x i8>, <8 x i8>* %s
521  %v2 = sext <8 x i8> %v to <8 x i16>
522  ret <8 x i16> %v2
523}
524
525define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
526; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
527; CHECK:         .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128)
528; CHECK-NEXT:  # %bb.0:
529; CHECK-NEXT:    local.get 0
530; CHECK-NEXT:    i16x8.load8x8_u 8
531; CHECK-NEXT:    # fallthrough-return
532  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
533  %v = load <8 x i8>, <8 x i8>* %s
534  %v2 = zext <8 x i8> %v to <8 x i16>
535  ret <8 x i16> %v2
536}
537
538define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
539; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
540; CHECK:         .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128)
541; CHECK-NEXT:  # %bb.0:
542; CHECK-NEXT:    local.get 0
543; CHECK-NEXT:    i16x8.load8x8_u 8
544; CHECK-NEXT:    # fallthrough-return
545  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
546  %v = load <8 x i8>, <8 x i8>* %s
547  ret <8 x i8> %v
548}
549
550define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
551; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
552; CHECK:         .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
553; CHECK-NEXT:  # %bb.0:
554; CHECK-NEXT:    local.get 0
555; CHECK-NEXT:    i32.const -16
556; CHECK-NEXT:    i32.add
557; CHECK-NEXT:    v128.load 0
558; CHECK-NEXT:    # fallthrough-return
559  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
560  %v = load <8 x i16>, <8 x i16>* %s
561  ret <8 x i16> %v
562}
563
564define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
565; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
566; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
567; CHECK-NEXT:  # %bb.0:
568; CHECK-NEXT:    local.get 0
569; CHECK-NEXT:    i32.const -2
570; CHECK-NEXT:    i32.add
571; CHECK-NEXT:    v16x8.load_splat 0
572; CHECK-NEXT:    # fallthrough-return
573  %s = getelementptr inbounds i16, i16* %p, i32 -1
574  %e = load i16, i16* %s
575  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
576  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
577  ret <8 x i16> %v2
578}
579
580define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
581; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
582; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
583; CHECK-NEXT:  # %bb.0:
584; CHECK-NEXT:    local.get 0
585; CHECK-NEXT:    i32.const -8
586; CHECK-NEXT:    i32.add
587; CHECK-NEXT:    i16x8.load8x8_s 0
588; CHECK-NEXT:    # fallthrough-return
589  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
590  %v = load <8 x i8>, <8 x i8>* %s
591  %v2 = sext <8 x i8> %v to <8 x i16>
592  ret <8 x i16> %v2
593}
594
595define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
596; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
597; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
598; CHECK-NEXT:  # %bb.0:
599; CHECK-NEXT:    local.get 0
600; CHECK-NEXT:    i32.const -8
601; CHECK-NEXT:    i32.add
602; CHECK-NEXT:    i16x8.load8x8_u 0
603; CHECK-NEXT:    # fallthrough-return
604  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
605  %v = load <8 x i8>, <8 x i8>* %s
606  %v2 = zext <8 x i8> %v to <8 x i16>
607  ret <8 x i16> %v2
608}
609
610define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
611; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
612; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
613; CHECK-NEXT:  # %bb.0:
614; CHECK-NEXT:    local.get 0
615; CHECK-NEXT:    i32.const -8
616; CHECK-NEXT:    i32.add
617; CHECK-NEXT:    i16x8.load8x8_u 0
618; CHECK-NEXT:    # fallthrough-return
619  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
620  %v = load <8 x i8>, <8 x i8>* %s
621  ret <8 x i8> %v
622}
623
624define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
625; CHECK-LABEL: load_v8i16_with_unfolded_offset:
626; CHECK:         .functype load_v8i16_with_unfolded_offset (i32) -> (v128)
627; CHECK-NEXT:  # %bb.0:
628; CHECK-NEXT:    local.get 0
629; CHECK-NEXT:    i32.const 16
630; CHECK-NEXT:    i32.add
631; CHECK-NEXT:    v128.load 0
632; CHECK-NEXT:    # fallthrough-return
633  %q = ptrtoint <8 x i16>* %p to i32
634  %r = add nsw i32 %q, 16
635  %s = inttoptr i32 %r to <8 x i16>*
636  %v = load <8 x i16>, <8 x i16>* %s
637  ret <8 x i16> %v
638}
639
640define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
641; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
642; CHECK:         .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128)
643; CHECK-NEXT:  # %bb.0:
644; CHECK-NEXT:    local.get 0
645; CHECK-NEXT:    i32.const 16
646; CHECK-NEXT:    i32.add
647; CHECK-NEXT:    v16x8.load_splat 0
648; CHECK-NEXT:    # fallthrough-return
649  %q = ptrtoint i16* %p to i32
650  %r = add nsw i32 %q, 16
651  %s = inttoptr i32 %r to i16*
652  %e = load i16, i16* %s
653  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
654  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
655  ret <8 x i16> %v2
656}
657
658define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
659; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
660; CHECK:         .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128)
661; CHECK-NEXT:  # %bb.0:
662; CHECK-NEXT:    local.get 0
663; CHECK-NEXT:    i32.const 16
664; CHECK-NEXT:    i32.add
665; CHECK-NEXT:    i16x8.load8x8_s 0
666; CHECK-NEXT:    # fallthrough-return
667  %q = ptrtoint <8 x i8>* %p to i32
668  %r = add nsw i32 %q, 16
669  %s = inttoptr i32 %r to <8 x i8>*
670  %v = load <8 x i8>, <8 x i8>* %s
671  %v2 = sext <8 x i8> %v to <8 x i16>
672  ret <8 x i16> %v2
673}
674
675define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
676; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
677; CHECK:         .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128)
678; CHECK-NEXT:  # %bb.0:
679; CHECK-NEXT:    local.get 0
680; CHECK-NEXT:    i32.const 16
681; CHECK-NEXT:    i32.add
682; CHECK-NEXT:    i16x8.load8x8_u 0
683; CHECK-NEXT:    # fallthrough-return
684  %q = ptrtoint <8 x i8>* %p to i32
685  %r = add nsw i32 %q, 16
686  %s = inttoptr i32 %r to <8 x i8>*
687  %v = load <8 x i8>, <8 x i8>* %s
688  %v2 = zext <8 x i8> %v to <8 x i16>
689  ret <8 x i16> %v2
690}
691
692define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
693; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
694; CHECK:         .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128)
695; CHECK-NEXT:  # %bb.0:
696; CHECK-NEXT:    local.get 0
697; CHECK-NEXT:    i32.const 16
698; CHECK-NEXT:    i32.add
699; CHECK-NEXT:    i16x8.load8x8_u 0
700; CHECK-NEXT:    # fallthrough-return
701  %q = ptrtoint <8 x i8>* %p to i32
702  %r = add nsw i32 %q, 16
703  %s = inttoptr i32 %r to <8 x i8>*
704  %v = load <8 x i8>, <8 x i8>* %s
705  ret <8 x i8> %v
706}
707
708define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
709; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
710; CHECK:         .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128)
711; CHECK-NEXT:  # %bb.0:
712; CHECK-NEXT:    local.get 0
713; CHECK-NEXT:    i32.const 16
714; CHECK-NEXT:    i32.add
715; CHECK-NEXT:    v128.load 0
716; CHECK-NEXT:    # fallthrough-return
717  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
718  %v = load <8 x i16>, <8 x i16>* %s
719  ret <8 x i16> %v
720}
721
722define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
723; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
724; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128)
725; CHECK-NEXT:  # %bb.0:
726; CHECK-NEXT:    local.get 0
727; CHECK-NEXT:    i32.const 2
728; CHECK-NEXT:    i32.add
729; CHECK-NEXT:    v16x8.load_splat 0
730; CHECK-NEXT:    # fallthrough-return
731  %s = getelementptr i16, i16* %p, i32 1
732  %e = load i16, i16* %s
733  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
734  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
735  ret <8 x i16> %v2
736}
737
738define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
739; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
740; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
741; CHECK-NEXT:  # %bb.0:
742; CHECK-NEXT:    local.get 0
743; CHECK-NEXT:    i32.const 8
744; CHECK-NEXT:    i32.add
745; CHECK-NEXT:    i16x8.load8x8_s 0
746; CHECK-NEXT:    # fallthrough-return
747  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
748  %v = load <8 x i8>, <8 x i8>* %s
749  %v2 = sext <8 x i8> %v to <8 x i16>
750  ret <8 x i16> %v2
751}
752
753define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
754; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
755; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
756; CHECK-NEXT:  # %bb.0:
757; CHECK-NEXT:    local.get 0
758; CHECK-NEXT:    i32.const 8
759; CHECK-NEXT:    i32.add
760; CHECK-NEXT:    i16x8.load8x8_u 0
761; CHECK-NEXT:    # fallthrough-return
762  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
763  %v = load <8 x i8>, <8 x i8>* %s
764  %v2 = zext <8 x i8> %v to <8 x i16>
765  ret <8 x i16> %v2
766}
767
768define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
769; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
770; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
771; CHECK-NEXT:  # %bb.0:
772; CHECK-NEXT:    local.get 0
773; CHECK-NEXT:    i32.const 8
774; CHECK-NEXT:    i32.add
775; CHECK-NEXT:    i16x8.load8x8_u 0
776; CHECK-NEXT:    # fallthrough-return
777  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
778  %v = load <8 x i8>, <8 x i8>* %s
779  ret <8 x i8> %v
780}
781
782define <8 x i16> @load_v8i16_from_numeric_address() {
783; CHECK-LABEL: load_v8i16_from_numeric_address:
784; CHECK:         .functype load_v8i16_from_numeric_address () -> (v128)
785; CHECK-NEXT:  # %bb.0:
786; CHECK-NEXT:    i32.const 0
787; CHECK-NEXT:    v128.load 32
788; CHECK-NEXT:    # fallthrough-return
789  %s = inttoptr i32 32 to <8 x i16>*
790  %v = load <8 x i16>, <8 x i16>* %s
791  ret <8 x i16> %v
792}
793
794define <8 x i16> @load_splat_v8i16_from_numeric_address() {
795; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
796; CHECK:         .functype load_splat_v8i16_from_numeric_address () -> (v128)
797; CHECK-NEXT:  # %bb.0:
798; CHECK-NEXT:    i32.const 0
799; CHECK-NEXT:    v16x8.load_splat 32
800; CHECK-NEXT:    # fallthrough-return
801  %s = inttoptr i32 32 to i16*
802  %e = load i16, i16* %s
803  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
804  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
805  ret <8 x i16> %v2
806}
807
808define <8 x i16> @load_sext_v8i16_from_numeric_address() {
809; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
810; CHECK:         .functype load_sext_v8i16_from_numeric_address () -> (v128)
811; CHECK-NEXT:  # %bb.0:
812; CHECK-NEXT:    i32.const 0
813; CHECK-NEXT:    i16x8.load8x8_s 32
814; CHECK-NEXT:    # fallthrough-return
815  %s = inttoptr i32 32 to <8 x i8>*
816  %v = load <8 x i8>, <8 x i8>* %s
817  %v2 = sext <8 x i8> %v to <8 x i16>
818  ret <8 x i16> %v2
819}
820
821define <8 x i16> @load_zext_v8i16_from_numeric_address() {
822; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
823; CHECK:         .functype load_zext_v8i16_from_numeric_address () -> (v128)
824; CHECK-NEXT:  # %bb.0:
825; CHECK-NEXT:    i32.const 0
826; CHECK-NEXT:    i16x8.load8x8_u 32
827; CHECK-NEXT:    # fallthrough-return
828  %s = inttoptr i32 32 to <8 x i8>*
829  %v = load <8 x i8>, <8 x i8>* %s
830  %v2 = zext <8 x i8> %v to <8 x i16>
831  ret <8 x i16> %v2
832}
833
834define <8 x i8> @load_ext_v8i16_from_numeric_address() {
835; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
836; CHECK:         .functype load_ext_v8i16_from_numeric_address () -> (v128)
837; CHECK-NEXT:  # %bb.0:
838; CHECK-NEXT:    i32.const 0
839; CHECK-NEXT:    i16x8.load8x8_u 32
840; CHECK-NEXT:    # fallthrough-return
841  %s = inttoptr i32 32 to <8 x i8>*
842  %v = load <8 x i8>, <8 x i8>* %s
843  ret <8 x i8> %v
844}
845
846@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
847define <8 x i16> @load_v8i16_from_global_address() {
848; CHECK-LABEL: load_v8i16_from_global_address:
849; CHECK:         .functype load_v8i16_from_global_address () -> (v128)
850; CHECK-NEXT:  # %bb.0:
851; CHECK-NEXT:    i32.const 0
852; CHECK-NEXT:    v128.load gv_v8i16
853; CHECK-NEXT:    # fallthrough-return
854  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
855  ret <8 x i16> %v
856}
857
858@gv_i16 = global i16 42
859define <8 x i16> @load_splat_v8i16_from_global_address() {
860; CHECK-LABEL: load_splat_v8i16_from_global_address:
861; CHECK:         .functype load_splat_v8i16_from_global_address () -> (v128)
862; CHECK-NEXT:  # %bb.0:
863; CHECK-NEXT:    i32.const 0
864; CHECK-NEXT:    v16x8.load_splat gv_i16
865; CHECK-NEXT:    # fallthrough-return
866  %e = load i16, i16* @gv_i16
867  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
868  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
869  ret <8 x i16> %v2
870}
871
872@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
873define <8 x i16> @load_sext_v8i16_from_global_address() {
874; CHECK-LABEL: load_sext_v8i16_from_global_address:
875; CHECK:         .functype load_sext_v8i16_from_global_address () -> (v128)
876; CHECK-NEXT:  # %bb.0:
877; CHECK-NEXT:    i32.const 0
878; CHECK-NEXT:    i16x8.load8x8_s gv_v8i8
879; CHECK-NEXT:    # fallthrough-return
880  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
881  %v2 = sext <8 x i8> %v to <8 x i16>
882  ret <8 x i16> %v2
883}
884
885define <8 x i16> @load_zext_v8i16_from_global_address() {
886; CHECK-LABEL: load_zext_v8i16_from_global_address:
887; CHECK:         .functype load_zext_v8i16_from_global_address () -> (v128)
888; CHECK-NEXT:  # %bb.0:
889; CHECK-NEXT:    i32.const 0
890; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
891; CHECK-NEXT:    # fallthrough-return
892  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
893  %v2 = zext <8 x i8> %v to <8 x i16>
894  ret <8 x i16> %v2
895}
896
897define <8 x i8> @load_ext_v8i16_from_global_address() {
898; CHECK-LABEL: load_ext_v8i16_from_global_address:
899; CHECK:         .functype load_ext_v8i16_from_global_address () -> (v128)
900; CHECK-NEXT:  # %bb.0:
901; CHECK-NEXT:    i32.const 0
902; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
903; CHECK-NEXT:    # fallthrough-return
904  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
905  ret <8 x i8> %v
906}
907
908
909define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
910; CHECK-LABEL: store_v8i16:
911; CHECK:         .functype store_v8i16 (v128, i32) -> ()
912; CHECK-NEXT:  # %bb.0:
913; CHECK-NEXT:    local.get 1
914; CHECK-NEXT:    local.get 0
915; CHECK-NEXT:    v128.store 0
916; CHECK-NEXT:    # fallthrough-return
917  store <8 x i16> %v , <8 x i16>* %p
918  ret void
919}
920
921define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) {
922; CHECK-LABEL: store_narrowing_v8i16:
923; CHECK:         .functype store_narrowing_v8i16 (v128, i32) -> ()
924; CHECK-NEXT:  # %bb.0:
925; CHECK-NEXT:    local.get 1
926; CHECK-NEXT:    i32.const 16711935
927; CHECK-NEXT:    i32x4.splat
928; CHECK-NEXT:    local.get 0
929; CHECK-NEXT:    v128.and
930; CHECK-NEXT:    local.get 0
931; CHECK-NEXT:    i8x16.narrow_i16x8_u
932; CHECK-NEXT:    i64x2.extract_lane 0
933; CHECK-NEXT:    i64.store 0
934; CHECK-NEXT:    # fallthrough-return
935  store <8 x i8> %v, <8 x i8>* %p
936  ret void
937}
938
939define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
940; CHECK-LABEL: store_v8i16_with_folded_offset:
941; CHECK:         .functype store_v8i16_with_folded_offset (v128, i32) -> ()
942; CHECK-NEXT:  # %bb.0:
943; CHECK-NEXT:    local.get 1
944; CHECK-NEXT:    local.get 0
945; CHECK-NEXT:    v128.store 16
946; CHECK-NEXT:    # fallthrough-return
947  %q = ptrtoint <8 x i16>* %p to i32
948  %r = add nuw i32 %q, 16
949  %s = inttoptr i32 %r to <8 x i16>*
950  store <8 x i16> %v , <8 x i16>* %s
951  ret void
952}
953
954define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p) {
955; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset:
956; CHECK:         .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
957; CHECK-NEXT:  # %bb.0:
958; CHECK-NEXT:    local.get 1
959; CHECK-NEXT:    i32.const 16711935
960; CHECK-NEXT:    i32x4.splat
961; CHECK-NEXT:    local.get 0
962; CHECK-NEXT:    v128.and
963; CHECK-NEXT:    local.get 0
964; CHECK-NEXT:    i8x16.narrow_i16x8_u
965; CHECK-NEXT:    i64x2.extract_lane 0
966; CHECK-NEXT:    i64.store 16
967; CHECK-NEXT:    # fallthrough-return
968  %q = ptrtoint <8 x i8>* %p to i32
969  %r = add nuw i32 %q, 16
970  %s = inttoptr i32 %r to <8 x i8>*
971  store <8 x i8> %v , <8 x i8>* %s
972  ret void
973}
974
975define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
976; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
977; CHECK:         .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
978; CHECK-NEXT:  # %bb.0:
979; CHECK-NEXT:    local.get 1
980; CHECK-NEXT:    local.get 0
981; CHECK-NEXT:    v128.store 16
982; CHECK-NEXT:    # fallthrough-return
983  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
984  store <8 x i16> %v , <8 x i16>* %s
985  ret void
986}
987
988define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
989; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset:
990; CHECK:         .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
991; CHECK-NEXT:  # %bb.0:
992; CHECK-NEXT:    local.get 1
993; CHECK-NEXT:    i32.const 16711935
994; CHECK-NEXT:    i32x4.splat
995; CHECK-NEXT:    local.get 0
996; CHECK-NEXT:    v128.and
997; CHECK-NEXT:    local.get 0
998; CHECK-NEXT:    i8x16.narrow_i16x8_u
999; CHECK-NEXT:    i64x2.extract_lane 0
1000; CHECK-NEXT:    i64.store 8
1001; CHECK-NEXT:    # fallthrough-return
1002  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
1003  store <8 x i8> %v , <8 x i8>* %s
1004  ret void
1005}
1006
1007define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
1008; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
1009; CHECK:         .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
1010; CHECK-NEXT:  # %bb.0:
1011; CHECK-NEXT:    local.get 1
1012; CHECK-NEXT:    i32.const -16
1013; CHECK-NEXT:    i32.add
1014; CHECK-NEXT:    local.get 0
1015; CHECK-NEXT:    v128.store 0
1016; CHECK-NEXT:    # fallthrough-return
1017  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
1018  store <8 x i16> %v , <8 x i16>* %s
1019  ret void
1020}
1021
1022define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, <8 x i8>* %p) {
1023; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset:
1024; CHECK:         .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
1025; CHECK-NEXT:  # %bb.0:
1026; CHECK-NEXT:    local.get 1
1027; CHECK-NEXT:    i32.const -8
1028; CHECK-NEXT:    i32.add
1029; CHECK-NEXT:    i32.const 16711935
1030; CHECK-NEXT:    i32x4.splat
1031; CHECK-NEXT:    local.get 0
1032; CHECK-NEXT:    v128.and
1033; CHECK-NEXT:    local.get 0
1034; CHECK-NEXT:    i8x16.narrow_i16x8_u
1035; CHECK-NEXT:    i64x2.extract_lane 0
1036; CHECK-NEXT:    i64.store 0
1037; CHECK-NEXT:    # fallthrough-return
1038  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
1039  store <8 x i8> %v , <8 x i8>* %s
1040  ret void
1041}
1042
1043define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
1044; CHECK-LABEL: store_v8i16_with_unfolded_offset:
1045; CHECK:         .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
1046; CHECK-NEXT:  # %bb.0:
1047; CHECK-NEXT:    local.get 1
1048; CHECK-NEXT:    i32.const 16
1049; CHECK-NEXT:    i32.add
1050; CHECK-NEXT:    local.get 0
1051; CHECK-NEXT:    v128.store 0
1052; CHECK-NEXT:    # fallthrough-return
1053  %q = ptrtoint <8 x i16>* %p to i32
1054  %r = add nsw i32 %q, 16
1055  %s = inttoptr i32 %r to <8 x i16>*
1056  store <8 x i16> %v , <8 x i16>* %s
1057  ret void
1058}
1059
1060define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %p) {
1061; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset:
1062; CHECK:         .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> ()
1063; CHECK-NEXT:  # %bb.0:
1064; CHECK-NEXT:    local.get 1
1065; CHECK-NEXT:    i32.const 16
1066; CHECK-NEXT:    i32.add
1067; CHECK-NEXT:    i32.const 16711935
1068; CHECK-NEXT:    i32x4.splat
1069; CHECK-NEXT:    local.get 0
1070; CHECK-NEXT:    v128.and
1071; CHECK-NEXT:    local.get 0
1072; CHECK-NEXT:    i8x16.narrow_i16x8_u
1073; CHECK-NEXT:    i64x2.extract_lane 0
1074; CHECK-NEXT:    i64.store 0
1075; CHECK-NEXT:    # fallthrough-return
1076  %q = ptrtoint <8 x i8>* %p to i32
1077  %r = add nsw i32 %q, 16
1078  %s = inttoptr i32 %r to <8 x i8>*
1079  store <8 x i8> %v , <8 x i8>* %s
1080  ret void
1081}
1082
1083define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
1084; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
1085; CHECK:         .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1086; CHECK-NEXT:  # %bb.0:
1087; CHECK-NEXT:    local.get 1
1088; CHECK-NEXT:    i32.const 16
1089; CHECK-NEXT:    i32.add
1090; CHECK-NEXT:    local.get 0
1091; CHECK-NEXT:    v128.store 0
1092; CHECK-NEXT:    # fallthrough-return
1093  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
1094  store <8 x i16> %v , <8 x i16>* %s
1095  ret void
1096}
1097
1098define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8>* %p) {
1099; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset:
1100; CHECK:         .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1101; CHECK-NEXT:  # %bb.0:
1102; CHECK-NEXT:    local.get 1
1103; CHECK-NEXT:    i32.const 8
1104; CHECK-NEXT:    i32.add
1105; CHECK-NEXT:    i32.const 16711935
1106; CHECK-NEXT:    i32x4.splat
1107; CHECK-NEXT:    local.get 0
1108; CHECK-NEXT:    v128.and
1109; CHECK-NEXT:    local.get 0
1110; CHECK-NEXT:    i8x16.narrow_i16x8_u
1111; CHECK-NEXT:    i64x2.extract_lane 0
1112; CHECK-NEXT:    i64.store 0
1113; CHECK-NEXT:    # fallthrough-return
1114  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
1115  store <8 x i8> %v , <8 x i8>* %s
1116  ret void
1117}
1118
1119define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
1120; CHECK-LABEL: store_v8i16_to_numeric_address:
1121; CHECK:         .functype store_v8i16_to_numeric_address (v128) -> ()
1122; CHECK-NEXT:  # %bb.0:
1123; CHECK-NEXT:    i32.const 0
1124; CHECK-NEXT:    local.get 0
1125; CHECK-NEXT:    v128.store 32
1126; CHECK-NEXT:    # fallthrough-return
1127  %s = inttoptr i32 32 to <8 x i16>*
1128  store <8 x i16> %v , <8 x i16>* %s
1129  ret void
1130}
1131
1132define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p) {
1133; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address:
1134; CHECK:         .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
1135; CHECK-NEXT:  # %bb.0:
1136; CHECK-NEXT:    i32.const 0
1137; CHECK-NEXT:    i32.const 16711935
1138; CHECK-NEXT:    i32x4.splat
1139; CHECK-NEXT:    local.get 0
1140; CHECK-NEXT:    v128.and
1141; CHECK-NEXT:    local.get 0
1142; CHECK-NEXT:    i8x16.narrow_i16x8_u
1143; CHECK-NEXT:    i64x2.extract_lane 0
1144; CHECK-NEXT:    i64.store 32
1145; CHECK-NEXT:    # fallthrough-return
1146  %s = inttoptr i32 32 to <8 x i8>*
1147  store <8 x i8> %v , <8 x i8>* %s
1148  ret void
1149}
1150
1151define void @store_v8i16_to_global_address(<8 x i16> %v) {
1152; CHECK-LABEL: store_v8i16_to_global_address:
1153; CHECK:         .functype store_v8i16_to_global_address (v128) -> ()
1154; CHECK-NEXT:  # %bb.0:
1155; CHECK-NEXT:    i32.const 0
1156; CHECK-NEXT:    local.get 0
1157; CHECK-NEXT:    v128.store gv_v8i16
1158; CHECK-NEXT:    # fallthrough-return
1159  store <8 x i16> %v , <8 x i16>* @gv_v8i16
1160  ret void
1161}
1162
1163define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
1164; CHECK-LABEL: store_narrowing_v8i16_to_global_address:
1165; CHECK:         .functype store_narrowing_v8i16_to_global_address (v128) -> ()
1166; CHECK-NEXT:  # %bb.0:
1167; CHECK-NEXT:    i32.const 0
1168; CHECK-NEXT:    i32.const 16711935
1169; CHECK-NEXT:    i32x4.splat
1170; CHECK-NEXT:    local.get 0
1171; CHECK-NEXT:    v128.and
1172; CHECK-NEXT:    local.get 0
1173; CHECK-NEXT:    i8x16.narrow_i16x8_u
1174; CHECK-NEXT:    i64x2.extract_lane 0
1175; CHECK-NEXT:    i64.store gv_v8i8
1176; CHECK-NEXT:    # fallthrough-return
1177  store <8 x i8> %v , <8 x i8>* @gv_v8i8
1178  ret void
1179}
1180
1181; ==============================================================================
1182; 4 x i32
1183; ==============================================================================
1184define <4 x i32> @load_v4i32(<4 x i32>* %p) {
1185; CHECK-LABEL: load_v4i32:
1186; CHECK:         .functype load_v4i32 (i32) -> (v128)
1187; CHECK-NEXT:  # %bb.0:
1188; CHECK-NEXT:    local.get 0
1189; CHECK-NEXT:    v128.load 0
1190; CHECK-NEXT:    # fallthrough-return
1191  %v = load <4 x i32>, <4 x i32>* %p
1192  ret <4 x i32> %v
1193}
1194
1195define <4 x i32> @load_splat_v4i32(i32* %addr) {
1196; CHECK-LABEL: load_splat_v4i32:
1197; CHECK:         .functype load_splat_v4i32 (i32) -> (v128)
1198; CHECK-NEXT:  # %bb.0:
1199; CHECK-NEXT:    local.get 0
1200; CHECK-NEXT:    v32x4.load_splat 0
1201; CHECK-NEXT:    # fallthrough-return
1202  %e = load i32, i32* %addr, align 4
1203  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1204  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1205  ret <4 x i32> %v2
1206}
1207
1208define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
1209; CHECK-LABEL: load_sext_v4i32:
1210; CHECK:         .functype load_sext_v4i32 (i32) -> (v128)
1211; CHECK-NEXT:  # %bb.0:
1212; CHECK-NEXT:    local.get 0
1213; CHECK-NEXT:    i32x4.load16x4_s 0
1214; CHECK-NEXT:    # fallthrough-return
1215  %v = load <4 x i16>, <4 x i16>* %p
1216  %v2 = sext <4 x i16> %v to <4 x i32>
1217  ret <4 x i32> %v2
1218}
1219
1220define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
1221; CHECK-LABEL: load_zext_v4i32:
1222; CHECK:         .functype load_zext_v4i32 (i32) -> (v128)
1223; CHECK-NEXT:  # %bb.0:
1224; CHECK-NEXT:    local.get 0
1225; CHECK-NEXT:    i32x4.load16x4_u 0
1226; CHECK-NEXT:    # fallthrough-return
1227  %v = load <4 x i16>, <4 x i16>* %p
1228  %v2 = zext <4 x i16> %v to <4 x i32>
1229  ret <4 x i32> %v2
1230}
1231
1232define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
1233; CHECK-LABEL: load_ext_v4i32:
1234; CHECK:         .functype load_ext_v4i32 (i32) -> (v128)
1235; CHECK-NEXT:  # %bb.0:
1236; CHECK-NEXT:    local.get 0
1237; CHECK-NEXT:    i32x4.load16x4_u 0
1238; CHECK-NEXT:    # fallthrough-return
1239  %v = load <4 x i16>, <4 x i16>* %p
1240  ret <4 x i16> %v
1241}
1242
1243define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
1244; CHECK-LABEL: load_v4i32_with_folded_offset:
1245; CHECK:         .functype load_v4i32_with_folded_offset (i32) -> (v128)
1246; CHECK-NEXT:  # %bb.0:
1247; CHECK-NEXT:    local.get 0
1248; CHECK-NEXT:    v128.load 16
1249; CHECK-NEXT:    # fallthrough-return
1250  %q = ptrtoint <4 x i32>* %p to i32
1251  %r = add nuw i32 %q, 16
1252  %s = inttoptr i32 %r to <4 x i32>*
1253  %v = load <4 x i32>, <4 x i32>* %s
1254  ret <4 x i32> %v
1255}
1256
1257define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
1258; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1259; CHECK:         .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
1260; CHECK-NEXT:  # %bb.0:
1261; CHECK-NEXT:    local.get 0
1262; CHECK-NEXT:    v32x4.load_splat 16
1263; CHECK-NEXT:    # fallthrough-return
1264  %q = ptrtoint i32* %p to i32
1265  %r = add nuw i32 %q, 16
1266  %s = inttoptr i32 %r to i32*
1267  %e = load i32, i32* %s
1268  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1269  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1270  ret <4 x i32> %v2
1271}
1272
1273define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
1274; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
1275; CHECK:         .functype load_sext_v4i32_with_folded_offset (i32) -> (v128)
1276; CHECK-NEXT:  # %bb.0:
1277; CHECK-NEXT:    local.get 0
1278; CHECK-NEXT:    i32x4.load16x4_s 16
1279; CHECK-NEXT:    # fallthrough-return
1280  %q = ptrtoint <4 x i16>* %p to i32
1281  %r = add nuw i32 %q, 16
1282  %s = inttoptr i32 %r to <4 x i16>*
1283  %v = load <4 x i16>, <4 x i16>* %s
1284  %v2 = sext <4 x i16> %v to <4 x i32>
1285  ret <4 x i32> %v2
1286}
1287
1288define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
1289; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
1290; CHECK:         .functype load_zext_v4i32_with_folded_offset (i32) -> (v128)
1291; CHECK-NEXT:  # %bb.0:
1292; CHECK-NEXT:    local.get 0
1293; CHECK-NEXT:    i32x4.load16x4_u 16
1294; CHECK-NEXT:    # fallthrough-return
1295  %q = ptrtoint <4 x i16>* %p to i32
1296  %r = add nuw i32 %q, 16
1297  %s = inttoptr i32 %r to <4 x i16>*
1298  %v = load <4 x i16>, <4 x i16>* %s
1299  %v2 = zext <4 x i16> %v to <4 x i32>
1300  ret <4 x i32> %v2
1301}
1302
1303define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
1304; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1305; CHECK:         .functype load_ext_v4i32_with_folded_offset (i32) -> (v128)
1306; CHECK-NEXT:  # %bb.0:
1307; CHECK-NEXT:    local.get 0
1308; CHECK-NEXT:    i32x4.load16x4_u 16
1309; CHECK-NEXT:    # fallthrough-return
1310  %q = ptrtoint <4 x i16>* %p to i32
1311  %r = add nuw i32 %q, 16
1312  %s = inttoptr i32 %r to <4 x i16>*
1313  %v = load <4 x i16>, <4 x i16>* %s
1314  ret <4 x i16> %v
1315}
1316
1317define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
1318; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1319; CHECK:         .functype load_v4i32_with_folded_gep_offset (i32) -> (v128)
1320; CHECK-NEXT:  # %bb.0:
1321; CHECK-NEXT:    local.get 0
1322; CHECK-NEXT:    v128.load 16
1323; CHECK-NEXT:    # fallthrough-return
1324  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1325  %v = load <4 x i32>, <4 x i32>* %s
1326  ret <4 x i32> %v
1327}
1328
1329define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
1330; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1331; CHECK:         .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
1332; CHECK-NEXT:  # %bb.0:
1333; CHECK-NEXT:    local.get 0
1334; CHECK-NEXT:    v32x4.load_splat 4
1335; CHECK-NEXT:    # fallthrough-return
1336  %s = getelementptr inbounds i32, i32* %p, i32 1
1337  %e = load i32, i32* %s
1338  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1339  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1340  ret <4 x i32> %v2
1341}
1342
1343define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1344; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
1345; CHECK:         .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128)
1346; CHECK-NEXT:  # %bb.0:
1347; CHECK-NEXT:    local.get 0
1348; CHECK-NEXT:    i32x4.load16x4_s 8
1349; CHECK-NEXT:    # fallthrough-return
1350  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1351  %v = load <4 x i16>, <4 x i16>* %s
1352  %v2 = sext <4 x i16> %v to <4 x i32>
1353  ret <4 x i32> %v2
1354}
1355
1356define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1357; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
1358; CHECK:         .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128)
1359; CHECK-NEXT:  # %bb.0:
1360; CHECK-NEXT:    local.get 0
1361; CHECK-NEXT:    i32x4.load16x4_u 8
1362; CHECK-NEXT:    # fallthrough-return
1363  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1364  %v = load <4 x i16>, <4 x i16>* %s
1365  %v2 = zext <4 x i16> %v to <4 x i32>
1366  ret <4 x i32> %v2
1367}
1368
1369define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1370; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1371; CHECK:         .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128)
1372; CHECK-NEXT:  # %bb.0:
1373; CHECK-NEXT:    local.get 0
1374; CHECK-NEXT:    i32x4.load16x4_u 8
1375; CHECK-NEXT:    # fallthrough-return
1376  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1377  %v = load <4 x i16>, <4 x i16>* %s
1378  ret <4 x i16> %v
1379}
1380
1381define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
1382; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1383; CHECK:         .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1384; CHECK-NEXT:  # %bb.0:
1385; CHECK-NEXT:    local.get 0
1386; CHECK-NEXT:    i32.const -16
1387; CHECK-NEXT:    i32.add
1388; CHECK-NEXT:    v128.load 0
1389; CHECK-NEXT:    # fallthrough-return
1390  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1391  %v = load <4 x i32>, <4 x i32>* %s
1392  ret <4 x i32> %v
1393}
1394
1395define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
1396; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1397; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1398; CHECK-NEXT:  # %bb.0:
1399; CHECK-NEXT:    local.get 0
1400; CHECK-NEXT:    i32.const -4
1401; CHECK-NEXT:    i32.add
1402; CHECK-NEXT:    v32x4.load_splat 0
1403; CHECK-NEXT:    # fallthrough-return
1404  %s = getelementptr inbounds i32, i32* %p, i32 -1
1405  %e = load i32, i32* %s
1406  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1407  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1408  ret <4 x i32> %v2
1409}
1410
1411define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1412; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
1413; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1414; CHECK-NEXT:  # %bb.0:
1415; CHECK-NEXT:    local.get 0
1416; CHECK-NEXT:    i32.const -8
1417; CHECK-NEXT:    i32.add
1418; CHECK-NEXT:    i32x4.load16x4_s 0
1419; CHECK-NEXT:    # fallthrough-return
1420  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1421  %v = load <4 x i16>, <4 x i16>* %s
1422  %v2 = sext <4 x i16> %v to <4 x i32>
1423  ret <4 x i32> %v2
1424}
1425
1426define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1427; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
1428; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1429; CHECK-NEXT:  # %bb.0:
1430; CHECK-NEXT:    local.get 0
1431; CHECK-NEXT:    i32.const -8
1432; CHECK-NEXT:    i32.add
1433; CHECK-NEXT:    i32x4.load16x4_u 0
1434; CHECK-NEXT:    # fallthrough-return
1435  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1436  %v = load <4 x i16>, <4 x i16>* %s
1437  %v2 = zext <4 x i16> %v to <4 x i32>
1438  ret <4 x i32> %v2
1439}
1440
1441define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1442; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1443; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1444; CHECK-NEXT:  # %bb.0:
1445; CHECK-NEXT:    local.get 0
1446; CHECK-NEXT:    i32.const -8
1447; CHECK-NEXT:    i32.add
1448; CHECK-NEXT:    i32x4.load16x4_u 0
1449; CHECK-NEXT:    # fallthrough-return
1450  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1451  %v = load <4 x i16>, <4 x i16>* %s
1452  ret <4 x i16> %v
1453}
1454
1455define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
1456; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1457; CHECK:         .functype load_v4i32_with_unfolded_offset (i32) -> (v128)
1458; CHECK-NEXT:  # %bb.0:
1459; CHECK-NEXT:    local.get 0
1460; CHECK-NEXT:    i32.const 16
1461; CHECK-NEXT:    i32.add
1462; CHECK-NEXT:    v128.load 0
1463; CHECK-NEXT:    # fallthrough-return
1464  %q = ptrtoint <4 x i32>* %p to i32
1465  %r = add nsw i32 %q, 16
1466  %s = inttoptr i32 %r to <4 x i32>*
1467  %v = load <4 x i32>, <4 x i32>* %s
1468  ret <4 x i32> %v
1469}
1470
1471define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
1472; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1473; CHECK:         .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128)
1474; CHECK-NEXT:  # %bb.0:
1475; CHECK-NEXT:    local.get 0
1476; CHECK-NEXT:    i32.const 16
1477; CHECK-NEXT:    i32.add
1478; CHECK-NEXT:    v32x4.load_splat 0
1479; CHECK-NEXT:    # fallthrough-return
1480  %q = ptrtoint i32* %p to i32
1481  %r = add nsw i32 %q, 16
1482  %s = inttoptr i32 %r to i32*
1483  %e = load i32, i32* %s
1484  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1485  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1486  ret <4 x i32> %v2
1487}
1488
1489define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1490; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
1491; CHECK:         .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128)
1492; CHECK-NEXT:  # %bb.0:
1493; CHECK-NEXT:    local.get 0
1494; CHECK-NEXT:    i32.const 16
1495; CHECK-NEXT:    i32.add
1496; CHECK-NEXT:    i32x4.load16x4_s 0
1497; CHECK-NEXT:    # fallthrough-return
1498  %q = ptrtoint <4 x i16>* %p to i32
1499  %r = add nsw i32 %q, 16
1500  %s = inttoptr i32 %r to <4 x i16>*
1501  %v = load <4 x i16>, <4 x i16>* %s
1502  %v2 = sext <4 x i16> %v to <4 x i32>
1503  ret <4 x i32> %v2
1504}
1505
1506define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1507; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
1508; CHECK:         .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128)
1509; CHECK-NEXT:  # %bb.0:
1510; CHECK-NEXT:    local.get 0
1511; CHECK-NEXT:    i32.const 16
1512; CHECK-NEXT:    i32.add
1513; CHECK-NEXT:    i32x4.load16x4_u 0
1514; CHECK-NEXT:    # fallthrough-return
1515  %q = ptrtoint <4 x i16>* %p to i32
1516  %r = add nsw i32 %q, 16
1517  %s = inttoptr i32 %r to <4 x i16>*
1518  %v = load <4 x i16>, <4 x i16>* %s
1519  %v2 = zext <4 x i16> %v to <4 x i32>
1520  ret <4 x i32> %v2
1521}
1522
1523define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1524; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1525; CHECK:         .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128)
1526; CHECK-NEXT:  # %bb.0:
1527; CHECK-NEXT:    local.get 0
1528; CHECK-NEXT:    i32.const 16
1529; CHECK-NEXT:    i32.add
1530; CHECK-NEXT:    i32x4.load16x4_u 0
1531; CHECK-NEXT:    # fallthrough-return
1532  %q = ptrtoint <4 x i16>* %p to i32
1533  %r = add nsw i32 %q, 16
1534  %s = inttoptr i32 %r to <4 x i16>*
1535  %v = load <4 x i16>, <4 x i16>* %s
1536  ret <4 x i16> %v
1537}
1538
1539define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
1540; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1541; CHECK:         .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1542; CHECK-NEXT:  # %bb.0:
1543; CHECK-NEXT:    local.get 0
1544; CHECK-NEXT:    i32.const 16
1545; CHECK-NEXT:    i32.add
1546; CHECK-NEXT:    v128.load 0
1547; CHECK-NEXT:    # fallthrough-return
1548  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1549  %v = load <4 x i32>, <4 x i32>* %s
1550  ret <4 x i32> %v
1551}
1552
1553define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
1554; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1555; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1556; CHECK-NEXT:  # %bb.0:
1557; CHECK-NEXT:    local.get 0
1558; CHECK-NEXT:    i32.const 4
1559; CHECK-NEXT:    i32.add
1560; CHECK-NEXT:    v32x4.load_splat 0
1561; CHECK-NEXT:    # fallthrough-return
1562  %s = getelementptr i32, i32* %p, i32 1
1563  %e = load i32, i32* %s
1564  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1565  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1566  ret <4 x i32> %v2
1567}
1568
1569define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1570; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
1571; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1572; CHECK-NEXT:  # %bb.0:
1573; CHECK-NEXT:    local.get 0
1574; CHECK-NEXT:    i32.const 8
1575; CHECK-NEXT:    i32.add
1576; CHECK-NEXT:    i32x4.load16x4_s 0
1577; CHECK-NEXT:    # fallthrough-return
1578  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1579  %v = load <4 x i16>, <4 x i16>* %s
1580  %v2 = sext <4 x i16> %v to <4 x i32>
1581  ret <4 x i32> %v2
1582}
1583
1584define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1585; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
1586; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1587; CHECK-NEXT:  # %bb.0:
1588; CHECK-NEXT:    local.get 0
1589; CHECK-NEXT:    i32.const 8
1590; CHECK-NEXT:    i32.add
1591; CHECK-NEXT:    i32x4.load16x4_u 0
1592; CHECK-NEXT:    # fallthrough-return
1593  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1594  %v = load <4 x i16>, <4 x i16>* %s
1595  %v2 = zext <4 x i16> %v to <4 x i32>
1596  ret <4 x i32> %v2
1597}
1598
1599define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1600; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1601; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1602; CHECK-NEXT:  # %bb.0:
1603; CHECK-NEXT:    local.get 0
1604; CHECK-NEXT:    i32.const 8
1605; CHECK-NEXT:    i32.add
1606; CHECK-NEXT:    i32x4.load16x4_u 0
1607; CHECK-NEXT:    # fallthrough-return
1608  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1609  %v = load <4 x i16>, <4 x i16>* %s
1610  ret <4 x i16> %v
1611}
1612
1613define <4 x i32> @load_v4i32_from_numeric_address() {
1614; CHECK-LABEL: load_v4i32_from_numeric_address:
1615; CHECK:         .functype load_v4i32_from_numeric_address () -> (v128)
1616; CHECK-NEXT:  # %bb.0:
1617; CHECK-NEXT:    i32.const 0
1618; CHECK-NEXT:    v128.load 32
1619; CHECK-NEXT:    # fallthrough-return
1620  %s = inttoptr i32 32 to <4 x i32>*
1621  %v = load <4 x i32>, <4 x i32>* %s
1622  ret <4 x i32> %v
1623}
1624
1625define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1626; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1627; CHECK:         .functype load_splat_v4i32_from_numeric_address () -> (v128)
1628; CHECK-NEXT:  # %bb.0:
1629; CHECK-NEXT:    i32.const 0
1630; CHECK-NEXT:    v32x4.load_splat 32
1631; CHECK-NEXT:    # fallthrough-return
1632  %s = inttoptr i32 32 to i32*
1633  %e = load i32, i32* %s
1634  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1635  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1636  ret <4 x i32> %v2
1637}
1638
1639define <4 x i32> @load_sext_v4i32_from_numeric_address() {
1640; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
1641; CHECK:         .functype load_sext_v4i32_from_numeric_address () -> (v128)
1642; CHECK-NEXT:  # %bb.0:
1643; CHECK-NEXT:    i32.const 0
1644; CHECK-NEXT:    i32x4.load16x4_s 32
1645; CHECK-NEXT:    # fallthrough-return
1646  %s = inttoptr i32 32 to <4 x i16>*
1647  %v = load <4 x i16>, <4 x i16>* %s
1648  %v2 = sext <4 x i16> %v to <4 x i32>
1649  ret <4 x i32> %v2
1650}
1651
1652define <4 x i32> @load_zext_v4i32_from_numeric_address() {
1653; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
1654; CHECK:         .functype load_zext_v4i32_from_numeric_address () -> (v128)
1655; CHECK-NEXT:  # %bb.0:
1656; CHECK-NEXT:    i32.const 0
1657; CHECK-NEXT:    i32x4.load16x4_u 32
1658; CHECK-NEXT:    # fallthrough-return
1659  %s = inttoptr i32 32 to <4 x i16>*
1660  %v = load <4 x i16>, <4 x i16>* %s
1661  %v2 = zext <4 x i16> %v to <4 x i32>
1662  ret <4 x i32> %v2
1663}
1664
1665define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1666; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1667; CHECK:         .functype load_ext_v4i32_from_numeric_address () -> (v128)
1668; CHECK-NEXT:  # %bb.0:
1669; CHECK-NEXT:    i32.const 0
1670; CHECK-NEXT:    i32x4.load16x4_u 32
1671; CHECK-NEXT:    # fallthrough-return
1672  %s = inttoptr i32 32 to <4 x i16>*
1673  %v = load <4 x i16>, <4 x i16>* %s
1674  ret <4 x i16> %v
1675}
1676
1677@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1678define <4 x i32> @load_v4i32_from_global_address() {
1679; CHECK-LABEL: load_v4i32_from_global_address:
1680; CHECK:         .functype load_v4i32_from_global_address () -> (v128)
1681; CHECK-NEXT:  # %bb.0:
1682; CHECK-NEXT:    i32.const 0
1683; CHECK-NEXT:    v128.load gv_v4i32
1684; CHECK-NEXT:    # fallthrough-return
1685  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
1686  ret <4 x i32> %v
1687}
1688
1689@gv_i32 = global i32 42
1690define <4 x i32> @load_splat_v4i32_from_global_address() {
1691; CHECK-LABEL: load_splat_v4i32_from_global_address:
1692; CHECK:         .functype load_splat_v4i32_from_global_address () -> (v128)
1693; CHECK-NEXT:  # %bb.0:
1694; CHECK-NEXT:    i32.const 0
1695; CHECK-NEXT:    v32x4.load_splat gv_i32
1696; CHECK-NEXT:    # fallthrough-return
1697  %e = load i32, i32* @gv_i32
1698  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1699  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1700  ret <4 x i32> %v2
1701}
1702
1703@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1704define <4 x i32> @load_sext_v4i32_from_global_address() {
1705; CHECK-LABEL: load_sext_v4i32_from_global_address:
1706; CHECK:         .functype load_sext_v4i32_from_global_address () -> (v128)
1707; CHECK-NEXT:  # %bb.0:
1708; CHECK-NEXT:    i32.const 0
1709; CHECK-NEXT:    i32x4.load16x4_s gv_v4i16
1710; CHECK-NEXT:    # fallthrough-return
1711  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1712  %v2 = sext <4 x i16> %v to <4 x i32>
1713  ret <4 x i32> %v2
1714}
1715
1716define <4 x i32> @load_zext_v4i32_from_global_address() {
1717; CHECK-LABEL: load_zext_v4i32_from_global_address:
1718; CHECK:         .functype load_zext_v4i32_from_global_address () -> (v128)
1719; CHECK-NEXT:  # %bb.0:
1720; CHECK-NEXT:    i32.const 0
1721; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1722; CHECK-NEXT:    # fallthrough-return
1723  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1724  %v2 = zext <4 x i16> %v to <4 x i32>
1725  ret <4 x i32> %v2
1726}
1727
1728define <4 x i16> @load_ext_v4i32_from_global_address() {
1729; CHECK-LABEL: load_ext_v4i32_from_global_address:
1730; CHECK:         .functype load_ext_v4i32_from_global_address () -> (v128)
1731; CHECK-NEXT:  # %bb.0:
1732; CHECK-NEXT:    i32.const 0
1733; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1734; CHECK-NEXT:    # fallthrough-return
1735  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1736  ret <4 x i16> %v
1737}
1738
1739define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
1740; CHECK-LABEL: store_v4i32:
1741; CHECK:         .functype store_v4i32 (v128, i32) -> ()
1742; CHECK-NEXT:  # %bb.0:
1743; CHECK-NEXT:    local.get 1
1744; CHECK-NEXT:    local.get 0
1745; CHECK-NEXT:    v128.store 0
1746; CHECK-NEXT:    # fallthrough-return
1747  store <4 x i32> %v , <4 x i32>* %p
1748  ret void
1749}
1750
1751define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) {
1752; CHECK-LABEL: store_narrowing_v4i32:
1753; CHECK:         .functype store_narrowing_v4i32 (v128, i32) -> ()
1754; CHECK-NEXT:  # %bb.0:
1755; CHECK-NEXT:    local.get 1
1756; CHECK-NEXT:    i32.const 65535
1757; CHECK-NEXT:    i32x4.splat
1758; CHECK-NEXT:    local.get 0
1759; CHECK-NEXT:    v128.and
1760; CHECK-NEXT:    local.get 0
1761; CHECK-NEXT:    i16x8.narrow_i32x4_u
1762; CHECK-NEXT:    i64x2.extract_lane 0
1763; CHECK-NEXT:    i64.store 0
1764; CHECK-NEXT:    # fallthrough-return
1765  store <4 x i16> %v , <4 x i16>* %p
1766  ret void
1767}
1768
1769define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
1770; CHECK-LABEL: store_v4i32_with_folded_offset:
1771; CHECK:         .functype store_v4i32_with_folded_offset (v128, i32) -> ()
1772; CHECK-NEXT:  # %bb.0:
1773; CHECK-NEXT:    local.get 1
1774; CHECK-NEXT:    local.get 0
1775; CHECK-NEXT:    v128.store 16
1776; CHECK-NEXT:    # fallthrough-return
1777  %q = ptrtoint <4 x i32>* %p to i32
1778  %r = add nuw i32 %q, 16
1779  %s = inttoptr i32 %r to <4 x i32>*
1780  store <4 x i32> %v , <4 x i32>* %s
1781  ret void
1782}
1783
1784define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %p) {
1785; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset:
1786; CHECK:         .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
1787; CHECK-NEXT:  # %bb.0:
1788; CHECK-NEXT:    local.get 1
1789; CHECK-NEXT:    i32.const 65535
1790; CHECK-NEXT:    i32x4.splat
1791; CHECK-NEXT:    local.get 0
1792; CHECK-NEXT:    v128.and
1793; CHECK-NEXT:    local.get 0
1794; CHECK-NEXT:    i16x8.narrow_i32x4_u
1795; CHECK-NEXT:    i64x2.extract_lane 0
1796; CHECK-NEXT:    i64.store 16
1797; CHECK-NEXT:    # fallthrough-return
1798  %q = ptrtoint <4 x i16>* %p to i32
1799  %r = add nuw i32 %q, 16
1800  %s = inttoptr i32 %r to <4 x i16>*
1801  store <4 x i16> %v , <4 x i16>* %s
1802  ret void
1803}
1804
1805define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1806; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
1807; CHECK:         .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
1808; CHECK-NEXT:  # %bb.0:
1809; CHECK-NEXT:    local.get 1
1810; CHECK-NEXT:    local.get 0
1811; CHECK-NEXT:    v128.store 16
1812; CHECK-NEXT:    # fallthrough-return
1813  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1814  store <4 x i32> %v , <4 x i32>* %s
1815  ret void
1816}
1817
1818define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
1819; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset:
1820; CHECK:         .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
1821; CHECK-NEXT:  # %bb.0:
1822; CHECK-NEXT:    local.get 1
1823; CHECK-NEXT:    i32.const 65535
1824; CHECK-NEXT:    i32x4.splat
1825; CHECK-NEXT:    local.get 0
1826; CHECK-NEXT:    v128.and
1827; CHECK-NEXT:    local.get 0
1828; CHECK-NEXT:    i16x8.narrow_i32x4_u
1829; CHECK-NEXT:    i64x2.extract_lane 0
1830; CHECK-NEXT:    i64.store 8
1831; CHECK-NEXT:    # fallthrough-return
1832  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1833  store <4 x i16> %v , <4 x i16>* %s
1834  ret void
1835}
1836
1837define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
1838; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
1839; CHECK:         .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1840; CHECK-NEXT:  # %bb.0:
1841; CHECK-NEXT:    local.get 1
1842; CHECK-NEXT:    i32.const -16
1843; CHECK-NEXT:    i32.add
1844; CHECK-NEXT:    local.get 0
1845; CHECK-NEXT:    v128.store 0
1846; CHECK-NEXT:    # fallthrough-return
1847  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1848  store <4 x i32> %v , <4 x i32>* %s
1849  ret void
1850}
1851
1852define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, <4 x i16>* %p) {
1853; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset:
1854; CHECK:         .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1855; CHECK-NEXT:  # %bb.0:
1856; CHECK-NEXT:    local.get 1
1857; CHECK-NEXT:    i32.const -8
1858; CHECK-NEXT:    i32.add
1859; CHECK-NEXT:    i32.const 65535
1860; CHECK-NEXT:    i32x4.splat
1861; CHECK-NEXT:    local.get 0
1862; CHECK-NEXT:    v128.and
1863; CHECK-NEXT:    local.get 0
1864; CHECK-NEXT:    i16x8.narrow_i32x4_u
1865; CHECK-NEXT:    i64x2.extract_lane 0
1866; CHECK-NEXT:    i64.store 0
1867; CHECK-NEXT:    # fallthrough-return
1868  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1869  store <4 x i16> %v , <4 x i16>* %s
1870  ret void
1871}
1872
1873define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
1874; CHECK-LABEL: store_v4i32_with_unfolded_offset:
1875; CHECK:         .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
1876; CHECK-NEXT:  # %bb.0:
1877; CHECK-NEXT:    local.get 1
1878; CHECK-NEXT:    i32.const 16
1879; CHECK-NEXT:    i32.add
1880; CHECK-NEXT:    local.get 0
1881; CHECK-NEXT:    v128.store 0
1882; CHECK-NEXT:    # fallthrough-return
1883  %q = ptrtoint <4 x i32>* %p to i32
1884  %r = add nsw i32 %q, 16
1885  %s = inttoptr i32 %r to <4 x i32>*
1886  store <4 x i32> %v , <4 x i32>* %s
1887  ret void
1888}
1889
1890define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>* %p) {
1891; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset:
1892; CHECK:         .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> ()
1893; CHECK-NEXT:  # %bb.0:
1894; CHECK-NEXT:    local.get 1
1895; CHECK-NEXT:    i32.const 16
1896; CHECK-NEXT:    i32.add
1897; CHECK-NEXT:    i32.const 65535
1898; CHECK-NEXT:    i32x4.splat
1899; CHECK-NEXT:    local.get 0
1900; CHECK-NEXT:    v128.and
1901; CHECK-NEXT:    local.get 0
1902; CHECK-NEXT:    i16x8.narrow_i32x4_u
1903; CHECK-NEXT:    i64x2.extract_lane 0
1904; CHECK-NEXT:    i64.store 0
1905; CHECK-NEXT:    # fallthrough-return
1906  %q = ptrtoint <4 x i16>* %p to i32
1907  %r = add nsw i32 %q, 16
1908  %s = inttoptr i32 %r to <4 x i16>*
1909  store <4 x i16> %v , <4 x i16>* %s
1910  ret void
1911}
1912
1913define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1914; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
1915; CHECK:         .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
1916; CHECK-NEXT:  # %bb.0:
1917; CHECK-NEXT:    local.get 1
1918; CHECK-NEXT:    i32.const 16
1919; CHECK-NEXT:    i32.add
1920; CHECK-NEXT:    local.get 0
1921; CHECK-NEXT:    v128.store 0
1922; CHECK-NEXT:    # fallthrough-return
1923  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1924  store <4 x i32> %v , <4 x i32>* %s
1925  ret void
1926}
1927
1928define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i16>* %p) {
1929; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset:
1930; CHECK:         .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
1931; CHECK-NEXT:  # %bb.0:
1932; CHECK-NEXT:    local.get 1
1933; CHECK-NEXT:    i32.const 8
1934; CHECK-NEXT:    i32.add
1935; CHECK-NEXT:    i32.const 65535
1936; CHECK-NEXT:    i32x4.splat
1937; CHECK-NEXT:    local.get 0
1938; CHECK-NEXT:    v128.and
1939; CHECK-NEXT:    local.get 0
1940; CHECK-NEXT:    i16x8.narrow_i32x4_u
1941; CHECK-NEXT:    i64x2.extract_lane 0
1942; CHECK-NEXT:    i64.store 0
1943; CHECK-NEXT:    # fallthrough-return
1944  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1945  store <4 x i16> %v , <4 x i16>* %s
1946  ret void
1947}
1948
1949define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
1950; CHECK-LABEL: store_v4i32_to_numeric_address:
1951; CHECK:         .functype store_v4i32_to_numeric_address (v128) -> ()
1952; CHECK-NEXT:  # %bb.0:
1953; CHECK-NEXT:    i32.const 0
1954; CHECK-NEXT:    local.get 0
1955; CHECK-NEXT:    v128.store 32
1956; CHECK-NEXT:    # fallthrough-return
1957  %s = inttoptr i32 32 to <4 x i32>*
1958  store <4 x i32> %v , <4 x i32>* %s
1959  ret void
1960}
1961
1962define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
1963; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address:
1964; CHECK:         .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
1965; CHECK-NEXT:  # %bb.0:
1966; CHECK-NEXT:    i32.const 0
1967; CHECK-NEXT:    i32.const 65535
1968; CHECK-NEXT:    i32x4.splat
1969; CHECK-NEXT:    local.get 0
1970; CHECK-NEXT:    v128.and
1971; CHECK-NEXT:    local.get 0
1972; CHECK-NEXT:    i16x8.narrow_i32x4_u
1973; CHECK-NEXT:    i64x2.extract_lane 0
1974; CHECK-NEXT:    i64.store 32
1975; CHECK-NEXT:    # fallthrough-return
1976  %s = inttoptr i32 32 to <4 x i16>*
1977  store <4 x i16> %v , <4 x i16>* %s
1978  ret void
1979}
1980
1981define void @store_v4i32_to_global_address(<4 x i32> %v) {
1982; CHECK-LABEL: store_v4i32_to_global_address:
1983; CHECK:         .functype store_v4i32_to_global_address (v128) -> ()
1984; CHECK-NEXT:  # %bb.0:
1985; CHECK-NEXT:    i32.const 0
1986; CHECK-NEXT:    local.get 0
1987; CHECK-NEXT:    v128.store gv_v4i32
1988; CHECK-NEXT:    # fallthrough-return
1989  store <4 x i32> %v , <4 x i32>* @gv_v4i32
1990  ret void
1991}
1992
1993define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
1994; CHECK-LABEL: store_narrowing_v4i32_to_global_address:
1995; CHECK:         .functype store_narrowing_v4i32_to_global_address (v128) -> ()
1996; CHECK-NEXT:  # %bb.0:
1997; CHECK-NEXT:    i32.const 0
1998; CHECK-NEXT:    i32.const 65535
1999; CHECK-NEXT:    i32x4.splat
2000; CHECK-NEXT:    local.get 0
2001; CHECK-NEXT:    v128.and
2002; CHECK-NEXT:    local.get 0
2003; CHECK-NEXT:    i16x8.narrow_i32x4_u
2004; CHECK-NEXT:    i64x2.extract_lane 0
2005; CHECK-NEXT:    i64.store gv_v4i16
2006; CHECK-NEXT:    # fallthrough-return
2007  store <4 x i16> %v , <4 x i16>* @gv_v4i16
2008  ret void
2009}
2010
2011; ==============================================================================
2012; 2 x i64
2013; ==============================================================================
2014define <2 x i64> @load_v2i64(<2 x i64>* %p) {
2015; CHECK-LABEL: load_v2i64:
2016; CHECK:         .functype load_v2i64 (i32) -> (v128)
2017; CHECK-NEXT:  # %bb.0:
2018; CHECK-NEXT:    local.get 0
2019; CHECK-NEXT:    v128.load 0
2020; CHECK-NEXT:    # fallthrough-return
2021  %v = load <2 x i64>, <2 x i64>* %p
2022  ret <2 x i64> %v
2023}
2024
2025define <2 x i64> @load_splat_v2i64(i64* %p) {
2026; CHECK-LABEL: load_splat_v2i64:
2027; CHECK:         .functype load_splat_v2i64 (i32) -> (v128)
2028; CHECK-NEXT:  # %bb.0:
2029; CHECK-NEXT:    local.get 0
2030; CHECK-NEXT:    v64x2.load_splat 0
2031; CHECK-NEXT:    # fallthrough-return
2032  %e = load i64, i64* %p
2033  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2034  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2035  ret <2 x i64> %v2
2036}
2037
2038define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
2039; CHECK-LABEL: load_sext_v2i64:
2040; CHECK:         .functype load_sext_v2i64 (i32) -> (v128)
2041; CHECK-NEXT:  # %bb.0:
2042; CHECK-NEXT:    local.get 0
2043; CHECK-NEXT:    i64x2.load32x2_s 0
2044; CHECK-NEXT:    # fallthrough-return
2045  %v = load <2 x i32>, <2 x i32>* %p
2046  %v2 = sext <2 x i32> %v to <2 x i64>
2047  ret <2 x i64> %v2
2048}
2049
2050define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
2051; CHECK-LABEL: load_zext_v2i64:
2052; CHECK:         .functype load_zext_v2i64 (i32) -> (v128)
2053; CHECK-NEXT:  # %bb.0:
2054; CHECK-NEXT:    local.get 0
2055; CHECK-NEXT:    i64x2.load32x2_u 0
2056; CHECK-NEXT:    # fallthrough-return
2057  %v = load <2 x i32>, <2 x i32>* %p
2058  %v2 = zext <2 x i32> %v to <2 x i64>
2059  ret <2 x i64> %v2
2060}
2061
2062define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
2063; CHECK-LABEL: load_ext_v2i64:
2064; CHECK:         .functype load_ext_v2i64 (i32) -> (v128)
2065; CHECK-NEXT:  # %bb.0:
2066; CHECK-NEXT:    local.get 0
2067; CHECK-NEXT:    i64x2.load32x2_u 0
2068; CHECK-NEXT:    # fallthrough-return
2069  %v = load <2 x i32>, <2 x i32>* %p
2070  ret <2 x i32> %v
2071}
2072
2073define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
2074; CHECK-LABEL: load_v2i64_with_folded_offset:
2075; CHECK:         .functype load_v2i64_with_folded_offset (i32) -> (v128)
2076; CHECK-NEXT:  # %bb.0:
2077; CHECK-NEXT:    local.get 0
2078; CHECK-NEXT:    v128.load 16
2079; CHECK-NEXT:    # fallthrough-return
2080  %q = ptrtoint <2 x i64>* %p to i32
2081  %r = add nuw i32 %q, 16
2082  %s = inttoptr i32 %r to <2 x i64>*
2083  %v = load <2 x i64>, <2 x i64>* %s
2084  ret <2 x i64> %v
2085}
2086
2087define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
2088; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
2089; CHECK:         .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
2090; CHECK-NEXT:  # %bb.0:
2091; CHECK-NEXT:    local.get 0
2092; CHECK-NEXT:    v64x2.load_splat 16
2093; CHECK-NEXT:    # fallthrough-return
2094  %q = ptrtoint i64* %p to i32
2095  %r = add nuw i32 %q, 16
2096  %s = inttoptr i32 %r to i64*
2097  %e = load i64, i64* %s
2098  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2099  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2100  ret <2 x i64> %v2
2101}
2102
2103define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
2104; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
2105; CHECK:         .functype load_sext_v2i64_with_folded_offset (i32) -> (v128)
2106; CHECK-NEXT:  # %bb.0:
2107; CHECK-NEXT:    local.get 0
2108; CHECK-NEXT:    i64x2.load32x2_s 16
2109; CHECK-NEXT:    # fallthrough-return
2110  %q = ptrtoint <2 x i32>* %p to i32
2111  %r = add nuw i32 %q, 16
2112  %s = inttoptr i32 %r to <2 x i32>*
2113  %v = load <2 x i32>, <2 x i32>* %s
2114  %v2 = sext <2 x i32> %v to <2 x i64>
2115  ret <2 x i64> %v2
2116}
2117
2118define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
2119; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
2120; CHECK:         .functype load_zext_v2i64_with_folded_offset (i32) -> (v128)
2121; CHECK-NEXT:  # %bb.0:
2122; CHECK-NEXT:    local.get 0
2123; CHECK-NEXT:    i64x2.load32x2_u 16
2124; CHECK-NEXT:    # fallthrough-return
2125  %q = ptrtoint <2 x i32>* %p to i32
2126  %r = add nuw i32 %q, 16
2127  %s = inttoptr i32 %r to <2 x i32>*
2128  %v = load <2 x i32>, <2 x i32>* %s
2129  %v2 = zext <2 x i32> %v to <2 x i64>
2130  ret <2 x i64> %v2
2131}
2132
2133define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
2134; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
2135; CHECK:         .functype load_ext_v2i64_with_folded_offset (i32) -> (v128)
2136; CHECK-NEXT:  # %bb.0:
2137; CHECK-NEXT:    local.get 0
2138; CHECK-NEXT:    i64x2.load32x2_u 16
2139; CHECK-NEXT:    # fallthrough-return
2140  %q = ptrtoint <2 x i32>* %p to i32
2141  %r = add nuw i32 %q, 16
2142  %s = inttoptr i32 %r to <2 x i32>*
2143  %v = load <2 x i32>, <2 x i32>* %s
2144  ret <2 x i32> %v
2145}
2146
2147define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
2148; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
2149; CHECK:         .functype load_v2i64_with_folded_gep_offset (i32) -> (v128)
2150; CHECK-NEXT:  # %bb.0:
2151; CHECK-NEXT:    local.get 0
2152; CHECK-NEXT:    v128.load 16
2153; CHECK-NEXT:    # fallthrough-return
2154  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2155  %v = load <2 x i64>, <2 x i64>* %s
2156  ret <2 x i64> %v
2157}
2158
2159define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
2160; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
2161; CHECK:         .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
2162; CHECK-NEXT:  # %bb.0:
2163; CHECK-NEXT:    local.get 0
2164; CHECK-NEXT:    v64x2.load_splat 8
2165; CHECK-NEXT:    # fallthrough-return
2166  %s = getelementptr inbounds i64, i64* %p, i32 1
2167  %e = load i64, i64* %s
2168  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2169  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2170  ret <2 x i64> %v2
2171}
2172
2173define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2174; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
2175; CHECK:         .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128)
2176; CHECK-NEXT:  # %bb.0:
2177; CHECK-NEXT:    local.get 0
2178; CHECK-NEXT:    i64x2.load32x2_s 8
2179; CHECK-NEXT:    # fallthrough-return
2180  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2181  %v = load <2 x i32>, <2 x i32>* %s
2182  %v2 = sext <2 x i32> %v to <2 x i64>
2183  ret <2 x i64> %v2
2184}
2185
2186define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2187; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
2188; CHECK:         .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128)
2189; CHECK-NEXT:  # %bb.0:
2190; CHECK-NEXT:    local.get 0
2191; CHECK-NEXT:    i64x2.load32x2_u 8
2192; CHECK-NEXT:    # fallthrough-return
2193  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2194  %v = load <2 x i32>, <2 x i32>* %s
2195  %v2 = zext <2 x i32> %v to <2 x i64>
2196  ret <2 x i64> %v2
2197}
2198
2199define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
2200; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
2201; CHECK:         .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128)
2202; CHECK-NEXT:  # %bb.0:
2203; CHECK-NEXT:    local.get 0
2204; CHECK-NEXT:    i64x2.load32x2_u 8
2205; CHECK-NEXT:    # fallthrough-return
2206  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
2207  %v = load <2 x i32>, <2 x i32>* %s
2208  ret <2 x i32> %v
2209}
2210
2211define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
2212; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
2213; CHECK:         .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2214; CHECK-NEXT:  # %bb.0:
2215; CHECK-NEXT:    local.get 0
2216; CHECK-NEXT:    i32.const -16
2217; CHECK-NEXT:    i32.add
2218; CHECK-NEXT:    v128.load 0
2219; CHECK-NEXT:    # fallthrough-return
2220  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2221  %v = load <2 x i64>, <2 x i64>* %s
2222  ret <2 x i64> %v
2223}
2224
2225define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
2226; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
2227; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2228; CHECK-NEXT:  # %bb.0:
2229; CHECK-NEXT:    local.get 0
2230; CHECK-NEXT:    i32.const -8
2231; CHECK-NEXT:    i32.add
2232; CHECK-NEXT:    v64x2.load_splat 0
2233; CHECK-NEXT:    # fallthrough-return
2234  %s = getelementptr inbounds i64, i64* %p, i32 -1
2235  %e = load i64, i64* %s
2236  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2237  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2238  ret <2 x i64> %v2
2239}
2240
2241define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2242; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
2243; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2244; CHECK-NEXT:  # %bb.0:
2245; CHECK-NEXT:    local.get 0
2246; CHECK-NEXT:    i32.const -8
2247; CHECK-NEXT:    i32.add
2248; CHECK-NEXT:    i64x2.load32x2_s 0
2249; CHECK-NEXT:    # fallthrough-return
2250  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2251  %v = load <2 x i32>, <2 x i32>* %s
2252  %v2 = sext <2 x i32> %v to <2 x i64>
2253  ret <2 x i64> %v2
2254}
2255
2256define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2257; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
2258; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2259; CHECK-NEXT:  # %bb.0:
2260; CHECK-NEXT:    local.get 0
2261; CHECK-NEXT:    i32.const -8
2262; CHECK-NEXT:    i32.add
2263; CHECK-NEXT:    i64x2.load32x2_u 0
2264; CHECK-NEXT:    # fallthrough-return
2265  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2266  %v = load <2 x i32>, <2 x i32>* %s
2267  %v2 = zext <2 x i32> %v to <2 x i64>
2268  ret <2 x i64> %v2
2269}
2270
2271define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
2272; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
2273; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2274; CHECK-NEXT:  # %bb.0:
2275; CHECK-NEXT:    local.get 0
2276; CHECK-NEXT:    i32.const -8
2277; CHECK-NEXT:    i32.add
2278; CHECK-NEXT:    i64x2.load32x2_u 0
2279; CHECK-NEXT:    # fallthrough-return
2280  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
2281  %v = load <2 x i32>, <2 x i32>* %s
2282  ret <2 x i32> %v
2283}
2284
2285define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
2286; CHECK-LABEL: load_v2i64_with_unfolded_offset:
2287; CHECK:         .functype load_v2i64_with_unfolded_offset (i32) -> (v128)
2288; CHECK-NEXT:  # %bb.0:
2289; CHECK-NEXT:    local.get 0
2290; CHECK-NEXT:    i32.const 16
2291; CHECK-NEXT:    i32.add
2292; CHECK-NEXT:    v128.load 0
2293; CHECK-NEXT:    # fallthrough-return
2294  %q = ptrtoint <2 x i64>* %p to i32
2295  %r = add nsw i32 %q, 16
2296  %s = inttoptr i32 %r to <2 x i64>*
2297  %v = load <2 x i64>, <2 x i64>* %s
2298  ret <2 x i64> %v
2299}
2300
2301define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
2302; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
2303; CHECK:         .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128)
2304; CHECK-NEXT:  # %bb.0:
2305; CHECK-NEXT:    local.get 0
2306; CHECK-NEXT:    i32.const 16
2307; CHECK-NEXT:    i32.add
2308; CHECK-NEXT:    v64x2.load_splat 0
2309; CHECK-NEXT:    # fallthrough-return
2310  %q = ptrtoint i64* %p to i32
2311  %r = add nsw i32 %q, 16
2312  %s = inttoptr i32 %r to i64*
2313  %e = load i64, i64* %s
2314  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2315  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2316  ret <2 x i64> %v2
2317}
2318
2319define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2320; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
2321; CHECK:         .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128)
2322; CHECK-NEXT:  # %bb.0:
2323; CHECK-NEXT:    local.get 0
2324; CHECK-NEXT:    i32.const 16
2325; CHECK-NEXT:    i32.add
2326; CHECK-NEXT:    i64x2.load32x2_s 0
2327; CHECK-NEXT:    # fallthrough-return
2328  %q = ptrtoint <2 x i32>* %p to i32
2329  %r = add nsw i32 %q, 16
2330  %s = inttoptr i32 %r to <2 x i32>*
2331  %v = load <2 x i32>, <2 x i32>* %s
2332  %v2 = sext <2 x i32> %v to <2 x i64>
2333  ret <2 x i64> %v2
2334}
2335
2336define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2337; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
2338; CHECK:         .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128)
2339; CHECK-NEXT:  # %bb.0:
2340; CHECK-NEXT:    local.get 0
2341; CHECK-NEXT:    i32.const 16
2342; CHECK-NEXT:    i32.add
2343; CHECK-NEXT:    i64x2.load32x2_u 0
2344; CHECK-NEXT:    # fallthrough-return
2345  %q = ptrtoint <2 x i32>* %p to i32
2346  %r = add nsw i32 %q, 16
2347  %s = inttoptr i32 %r to <2 x i32>*
2348  %v = load <2 x i32>, <2 x i32>* %s
2349  %v2 = zext <2 x i32> %v to <2 x i64>
2350  ret <2 x i64> %v2
2351}
2352
2353define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2354; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
2355; CHECK:         .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128)
2356; CHECK-NEXT:  # %bb.0:
2357; CHECK-NEXT:    local.get 0
2358; CHECK-NEXT:    i32.const 16
2359; CHECK-NEXT:    i32.add
2360; CHECK-NEXT:    i64x2.load32x2_u 0
2361; CHECK-NEXT:    # fallthrough-return
2362  %q = ptrtoint <2 x i32>* %p to i32
2363  %r = add nsw i32 %q, 16
2364  %s = inttoptr i32 %r to <2 x i32>*
2365  %v = load <2 x i32>, <2 x i32>* %s
2366  ret <2 x i32> %v
2367}
2368
2369define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
2370; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
2371; CHECK:         .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2372; CHECK-NEXT:  # %bb.0:
2373; CHECK-NEXT:    local.get 0
2374; CHECK-NEXT:    i32.const 16
2375; CHECK-NEXT:    i32.add
2376; CHECK-NEXT:    v128.load 0
2377; CHECK-NEXT:    # fallthrough-return
2378  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2379  %v = load <2 x i64>, <2 x i64>* %s
2380  ret <2 x i64> %v
2381}
2382
2383define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
2384; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
2385; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2386; CHECK-NEXT:  # %bb.0:
2387; CHECK-NEXT:    local.get 0
2388; CHECK-NEXT:    i32.const 8
2389; CHECK-NEXT:    i32.add
2390; CHECK-NEXT:    v64x2.load_splat 0
2391; CHECK-NEXT:    # fallthrough-return
2392  %s = getelementptr i64, i64* %p, i32 1
2393  %e = load i64, i64* %s
2394  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2395  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2396  ret <2 x i64> %v2
2397}
2398
2399define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2400; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
2401; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2402; CHECK-NEXT:  # %bb.0:
2403; CHECK-NEXT:    local.get 0
2404; CHECK-NEXT:    i32.const 8
2405; CHECK-NEXT:    i32.add
2406; CHECK-NEXT:    i64x2.load32x2_s 0
2407; CHECK-NEXT:    # fallthrough-return
2408  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2409  %v = load <2 x i32>, <2 x i32>* %s
2410  %v2 = sext <2 x i32> %v to <2 x i64>
2411  ret <2 x i64> %v2
2412}
2413
2414define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2415; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
2416; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2417; CHECK-NEXT:  # %bb.0:
2418; CHECK-NEXT:    local.get 0
2419; CHECK-NEXT:    i32.const 8
2420; CHECK-NEXT:    i32.add
2421; CHECK-NEXT:    i64x2.load32x2_u 0
2422; CHECK-NEXT:    # fallthrough-return
2423  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2424  %v = load <2 x i32>, <2 x i32>* %s
2425  %v2 = zext <2 x i32> %v to <2 x i64>
2426  ret <2 x i64> %v2
2427}
2428
2429define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2430; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
2431; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2432; CHECK-NEXT:  # %bb.0:
2433; CHECK-NEXT:    local.get 0
2434; CHECK-NEXT:    i32.const 8
2435; CHECK-NEXT:    i32.add
2436; CHECK-NEXT:    i64x2.load32x2_u 0
2437; CHECK-NEXT:    # fallthrough-return
2438  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2439  %v = load <2 x i32>, <2 x i32>* %s
2440  ret <2 x i32> %v
2441}
2442
2443define <2 x i64> @load_v2i64_from_numeric_address() {
2444; CHECK-LABEL: load_v2i64_from_numeric_address:
2445; CHECK:         .functype load_v2i64_from_numeric_address () -> (v128)
2446; CHECK-NEXT:  # %bb.0:
2447; CHECK-NEXT:    i32.const 0
2448; CHECK-NEXT:    v128.load 32
2449; CHECK-NEXT:    # fallthrough-return
2450  %s = inttoptr i32 32 to <2 x i64>*
2451  %v = load <2 x i64>, <2 x i64>* %s
2452  ret <2 x i64> %v
2453}
2454
2455define <2 x i64> @load_splat_v2i64_from_numeric_address() {
2456; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
2457; CHECK:         .functype load_splat_v2i64_from_numeric_address () -> (v128)
2458; CHECK-NEXT:  # %bb.0:
2459; CHECK-NEXT:    i32.const 0
2460; CHECK-NEXT:    v64x2.load_splat 32
2461; CHECK-NEXT:    # fallthrough-return
2462  %s = inttoptr i32 32 to i64*
2463  %e = load i64, i64* %s
2464  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2465  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2466  ret <2 x i64> %v2
2467}
2468
2469define <2 x i64> @load_sext_v2i64_from_numeric_address() {
2470; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
2471; CHECK:         .functype load_sext_v2i64_from_numeric_address () -> (v128)
2472; CHECK-NEXT:  # %bb.0:
2473; CHECK-NEXT:    i32.const 0
2474; CHECK-NEXT:    i64x2.load32x2_s 32
2475; CHECK-NEXT:    # fallthrough-return
2476  %s = inttoptr i32 32 to <2 x i32>*
2477  %v = load <2 x i32>, <2 x i32>* %s
2478  %v2 = sext <2 x i32> %v to <2 x i64>
2479  ret <2 x i64> %v2
2480}
2481
2482define <2 x i64> @load_zext_v2i64_from_numeric_address() {
2483; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
2484; CHECK:         .functype load_zext_v2i64_from_numeric_address () -> (v128)
2485; CHECK-NEXT:  # %bb.0:
2486; CHECK-NEXT:    i32.const 0
2487; CHECK-NEXT:    i64x2.load32x2_u 32
2488; CHECK-NEXT:    # fallthrough-return
2489  %s = inttoptr i32 32 to <2 x i32>*
2490  %v = load <2 x i32>, <2 x i32>* %s
2491  %v2 = zext <2 x i32> %v to <2 x i64>
2492  ret <2 x i64> %v2
2493}
2494
2495define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2496; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2497; CHECK:         .functype load_ext_v2i64_from_numeric_address () -> (v128)
2498; CHECK-NEXT:  # %bb.0:
2499; CHECK-NEXT:    i32.const 0
2500; CHECK-NEXT:    i64x2.load32x2_u 32
2501; CHECK-NEXT:    # fallthrough-return
2502  %s = inttoptr i32 32 to <2 x i32>*
2503  %v = load <2 x i32>, <2 x i32>* %s
2504  ret <2 x i32> %v
2505}
2506
2507@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2508define <2 x i64> @load_v2i64_from_global_address() {
2509; CHECK-LABEL: load_v2i64_from_global_address:
2510; CHECK:         .functype load_v2i64_from_global_address () -> (v128)
2511; CHECK-NEXT:  # %bb.0:
2512; CHECK-NEXT:    i32.const 0
2513; CHECK-NEXT:    v128.load gv_v2i64
2514; CHECK-NEXT:    # fallthrough-return
2515  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
2516  ret <2 x i64> %v
2517}
2518
2519@gv_i64 = global i64 42
2520define <2 x i64> @load_splat_v2i64_from_global_address() {
2521; CHECK-LABEL: load_splat_v2i64_from_global_address:
2522; CHECK:         .functype load_splat_v2i64_from_global_address () -> (v128)
2523; CHECK-NEXT:  # %bb.0:
2524; CHECK-NEXT:    i32.const 0
2525; CHECK-NEXT:    v64x2.load_splat gv_i64
2526; CHECK-NEXT:    # fallthrough-return
2527  %e = load i64, i64* @gv_i64
2528  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2529  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2530  ret <2 x i64> %v2
2531}
2532
2533@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2534define <2 x i64> @load_sext_v2i64_from_global_address() {
2535; CHECK-LABEL: load_sext_v2i64_from_global_address:
2536; CHECK:         .functype load_sext_v2i64_from_global_address () -> (v128)
2537; CHECK-NEXT:  # %bb.0:
2538; CHECK-NEXT:    i32.const 0
2539; CHECK-NEXT:    i64x2.load32x2_s gv_v2i32
2540; CHECK-NEXT:    # fallthrough-return
2541  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2542  %v2 = sext <2 x i32> %v to <2 x i64>
2543  ret <2 x i64> %v2
2544}
2545
2546define <2 x i64> @load_zext_v2i64_from_global_address() {
2547; CHECK-LABEL: load_zext_v2i64_from_global_address:
2548; CHECK:         .functype load_zext_v2i64_from_global_address () -> (v128)
2549; CHECK-NEXT:  # %bb.0:
2550; CHECK-NEXT:    i32.const 0
2551; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2552; CHECK-NEXT:    # fallthrough-return
2553  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2554  %v2 = zext <2 x i32> %v to <2 x i64>
2555  ret <2 x i64> %v2
2556}
2557
2558define <2 x i32> @load_ext_v2i64_from_global_address() {
2559; CHECK-LABEL: load_ext_v2i64_from_global_address:
2560; CHECK:         .functype load_ext_v2i64_from_global_address () -> (v128)
2561; CHECK-NEXT:  # %bb.0:
2562; CHECK-NEXT:    i32.const 0
2563; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2564; CHECK-NEXT:    # fallthrough-return
2565  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2566  ret <2 x i32> %v
2567}
2568
2569define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
2570; CHECK-LABEL: store_v2i64:
2571; CHECK:         .functype store_v2i64 (v128, i32) -> ()
2572; CHECK-NEXT:  # %bb.0:
2573; CHECK-NEXT:    local.get 1
2574; CHECK-NEXT:    local.get 0
2575; CHECK-NEXT:    v128.store 0
2576; CHECK-NEXT:    # fallthrough-return
2577  store <2 x i64> %v , <2 x i64>* %p
2578  ret void
2579}
2580
2581define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
2582; CHECK-LABEL: store_v2i64_with_folded_offset:
2583; CHECK:         .functype store_v2i64_with_folded_offset (v128, i32) -> ()
2584; CHECK-NEXT:  # %bb.0:
2585; CHECK-NEXT:    local.get 1
2586; CHECK-NEXT:    local.get 0
2587; CHECK-NEXT:    v128.store 16
2588; CHECK-NEXT:    # fallthrough-return
2589  %q = ptrtoint <2 x i64>* %p to i32
2590  %r = add nuw i32 %q, 16
2591  %s = inttoptr i32 %r to <2 x i64>*
2592  store <2 x i64> %v , <2 x i64>* %s
2593  ret void
2594}
2595
2596define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2597; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2598; CHECK:         .functype store_v2i64_with_folded_gep_offset (v128, i32) -> ()
2599; CHECK-NEXT:  # %bb.0:
2600; CHECK-NEXT:    local.get 1
2601; CHECK-NEXT:    local.get 0
2602; CHECK-NEXT:    v128.store 16
2603; CHECK-NEXT:    # fallthrough-return
2604  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2605  store <2 x i64> %v , <2 x i64>* %s
2606  ret void
2607}
2608
2609define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
2610; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2611; CHECK:         .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
2612; CHECK-NEXT:  # %bb.0:
2613; CHECK-NEXT:    local.get 1
2614; CHECK-NEXT:    i32.const -16
2615; CHECK-NEXT:    i32.add
2616; CHECK-NEXT:    local.get 0
2617; CHECK-NEXT:    v128.store 0
2618; CHECK-NEXT:    # fallthrough-return
2619  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2620  store <2 x i64> %v , <2 x i64>* %s
2621  ret void
2622}
2623
2624define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
2625; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2626; CHECK:         .functype store_v2i64_with_unfolded_offset (v128, i32) -> ()
2627; CHECK-NEXT:  # %bb.0:
2628; CHECK-NEXT:    local.get 1
2629; CHECK-NEXT:    i32.const 16
2630; CHECK-NEXT:    i32.add
2631; CHECK-NEXT:    local.get 0
2632; CHECK-NEXT:    v128.store 0
2633; CHECK-NEXT:    # fallthrough-return
2634  %q = ptrtoint <2 x i64>* %p to i32
2635  %r = add nsw i32 %q, 16
2636  %s = inttoptr i32 %r to <2 x i64>*
2637  store <2 x i64> %v , <2 x i64>* %s
2638  ret void
2639}
2640
2641define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2642; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2643; CHECK:         .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> ()
2644; CHECK-NEXT:  # %bb.0:
2645; CHECK-NEXT:    local.get 1
2646; CHECK-NEXT:    i32.const 16
2647; CHECK-NEXT:    i32.add
2648; CHECK-NEXT:    local.get 0
2649; CHECK-NEXT:    v128.store 0
2650; CHECK-NEXT:    # fallthrough-return
2651  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2652  store <2 x i64> %v , <2 x i64>* %s
2653  ret void
2654}
2655
2656define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2657; CHECK-LABEL: store_v2i64_to_numeric_address:
2658; CHECK:         .functype store_v2i64_to_numeric_address (v128) -> ()
2659; CHECK-NEXT:  # %bb.0:
2660; CHECK-NEXT:    i32.const 0
2661; CHECK-NEXT:    local.get 0
2662; CHECK-NEXT:    v128.store 32
2663; CHECK-NEXT:    # fallthrough-return
2664  %s = inttoptr i32 32 to <2 x i64>*
2665  store <2 x i64> %v , <2 x i64>* %s
2666  ret void
2667}
2668
2669define void @store_v2i64_to_global_address(<2 x i64> %v) {
2670; CHECK-LABEL: store_v2i64_to_global_address:
2671; CHECK:         .functype store_v2i64_to_global_address (v128) -> ()
2672; CHECK-NEXT:  # %bb.0:
2673; CHECK-NEXT:    i32.const 0
2674; CHECK-NEXT:    local.get 0
2675; CHECK-NEXT:    v128.store gv_v2i64
2676; CHECK-NEXT:    # fallthrough-return
2677  store <2 x i64> %v , <2 x i64>* @gv_v2i64
2678  ret void
2679}
2680
2681; ==============================================================================
2682; 4 x float
2683; ==============================================================================
2684define <4 x float> @load_v4f32(<4 x float>* %p) {
2685; CHECK-LABEL: load_v4f32:
2686; CHECK:         .functype load_v4f32 (i32) -> (v128)
2687; CHECK-NEXT:  # %bb.0:
2688; CHECK-NEXT:    local.get 0
2689; CHECK-NEXT:    v128.load 0
2690; CHECK-NEXT:    # fallthrough-return
2691  %v = load <4 x float>, <4 x float>* %p
2692  ret <4 x float> %v
2693}
2694
2695define <4 x float> @load_splat_v4f32(float* %p) {
2696; CHECK-LABEL: load_splat_v4f32:
2697; CHECK:         .functype load_splat_v4f32 (i32) -> (v128)
2698; CHECK-NEXT:  # %bb.0:
2699; CHECK-NEXT:    local.get 0
2700; CHECK-NEXT:    v32x4.load_splat 0
2701; CHECK-NEXT:    # fallthrough-return
2702  %e = load float, float* %p
2703  %v1 = insertelement <4 x float> undef, float %e, i32 0
2704  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2705  ret <4 x float> %v2
2706}
2707
2708define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
2709; CHECK-LABEL: load_v4f32_with_folded_offset:
2710; CHECK:         .functype load_v4f32_with_folded_offset (i32) -> (v128)
2711; CHECK-NEXT:  # %bb.0:
2712; CHECK-NEXT:    local.get 0
2713; CHECK-NEXT:    v128.load 16
2714; CHECK-NEXT:    # fallthrough-return
2715  %q = ptrtoint <4 x float>* %p to i32
2716  %r = add nuw i32 %q, 16
2717  %s = inttoptr i32 %r to <4 x float>*
2718  %v = load <4 x float>, <4 x float>* %s
2719  ret <4 x float> %v
2720}
2721
2722define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
2723; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2724; CHECK:         .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
2725; CHECK-NEXT:  # %bb.0:
2726; CHECK-NEXT:    local.get 0
2727; CHECK-NEXT:    v32x4.load_splat 16
2728; CHECK-NEXT:    # fallthrough-return
2729  %q = ptrtoint float* %p to i32
2730  %r = add nuw i32 %q, 16
2731  %s = inttoptr i32 %r to float*
2732  %e = load float, float* %s
2733  %v1 = insertelement <4 x float> undef, float %e, i32 0
2734  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2735  ret <4 x float> %v2
2736}
2737
2738define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
2739; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2740; CHECK:         .functype load_v4f32_with_folded_gep_offset (i32) -> (v128)
2741; CHECK-NEXT:  # %bb.0:
2742; CHECK-NEXT:    local.get 0
2743; CHECK-NEXT:    v128.load 16
2744; CHECK-NEXT:    # fallthrough-return
2745  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2746  %v = load <4 x float>, <4 x float>* %s
2747  ret <4 x float> %v
2748}
2749
2750define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
2751; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2752; CHECK:         .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
2753; CHECK-NEXT:  # %bb.0:
2754; CHECK-NEXT:    local.get 0
2755; CHECK-NEXT:    v32x4.load_splat 4
2756; CHECK-NEXT:    # fallthrough-return
2757  %s = getelementptr inbounds float, float* %p, i32 1
2758  %e = load float, float* %s
2759  %v1 = insertelement <4 x float> undef, float %e, i32 0
2760  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2761  ret <4 x float> %v2
2762}
2763
2764define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
2765; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2766; CHECK:         .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2767; CHECK-NEXT:  # %bb.0:
2768; CHECK-NEXT:    local.get 0
2769; CHECK-NEXT:    i32.const -16
2770; CHECK-NEXT:    i32.add
2771; CHECK-NEXT:    v128.load 0
2772; CHECK-NEXT:    # fallthrough-return
2773  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2774  %v = load <4 x float>, <4 x float>* %s
2775  ret <4 x float> %v
2776}
2777
2778define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
2779; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2780; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2781; CHECK-NEXT:  # %bb.0:
2782; CHECK-NEXT:    local.get 0
2783; CHECK-NEXT:    i32.const -4
2784; CHECK-NEXT:    i32.add
2785; CHECK-NEXT:    v32x4.load_splat 0
2786; CHECK-NEXT:    # fallthrough-return
2787  %s = getelementptr inbounds float, float* %p, i32 -1
2788  %e = load float, float* %s
2789  %v1 = insertelement <4 x float> undef, float %e, i32 0
2790  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2791  ret <4 x float> %v2
2792}
2793
2794define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
2795; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2796; CHECK:         .functype load_v4f32_with_unfolded_offset (i32) -> (v128)
2797; CHECK-NEXT:  # %bb.0:
2798; CHECK-NEXT:    local.get 0
2799; CHECK-NEXT:    i32.const 16
2800; CHECK-NEXT:    i32.add
2801; CHECK-NEXT:    v128.load 0
2802; CHECK-NEXT:    # fallthrough-return
2803  %q = ptrtoint <4 x float>* %p to i32
2804  %r = add nsw i32 %q, 16
2805  %s = inttoptr i32 %r to <4 x float>*
2806  %v = load <4 x float>, <4 x float>* %s
2807  ret <4 x float> %v
2808}
2809
2810define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
2811; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2812; CHECK:         .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128)
2813; CHECK-NEXT:  # %bb.0:
2814; CHECK-NEXT:    local.get 0
2815; CHECK-NEXT:    i32.const 16
2816; CHECK-NEXT:    i32.add
2817; CHECK-NEXT:    v32x4.load_splat 0
2818; CHECK-NEXT:    # fallthrough-return
2819  %q = ptrtoint float* %p to i32
2820  %r = add nsw i32 %q, 16
2821  %s = inttoptr i32 %r to float*
2822  %e = load float, float* %s
2823  %v1 = insertelement <4 x float> undef, float %e, i32 0
2824  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2825  ret <4 x float> %v2
2826}
2827
2828define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
2829; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2830; CHECK:         .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2831; CHECK-NEXT:  # %bb.0:
2832; CHECK-NEXT:    local.get 0
2833; CHECK-NEXT:    i32.const 16
2834; CHECK-NEXT:    i32.add
2835; CHECK-NEXT:    v128.load 0
2836; CHECK-NEXT:    # fallthrough-return
2837  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2838  %v = load <4 x float>, <4 x float>* %s
2839  ret <4 x float> %v
2840}
2841
2842define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
2843; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
2844; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2845; CHECK-NEXT:  # %bb.0:
2846; CHECK-NEXT:    local.get 0
2847; CHECK-NEXT:    i32.const 4
2848; CHECK-NEXT:    i32.add
2849; CHECK-NEXT:    v32x4.load_splat 0
2850; CHECK-NEXT:    # fallthrough-return
2851  %s = getelementptr float, float* %p, i32 1
2852  %e = load float, float* %s
2853  %v1 = insertelement <4 x float> undef, float %e, i32 0
2854  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2855  ret <4 x float> %v2
2856}
2857
2858define <4 x float> @load_v4f32_from_numeric_address() {
2859; CHECK-LABEL: load_v4f32_from_numeric_address:
2860; CHECK:         .functype load_v4f32_from_numeric_address () -> (v128)
2861; CHECK-NEXT:  # %bb.0:
2862; CHECK-NEXT:    i32.const 0
2863; CHECK-NEXT:    v128.load 32
2864; CHECK-NEXT:    # fallthrough-return
2865  %s = inttoptr i32 32 to <4 x float>*
2866  %v = load <4 x float>, <4 x float>* %s
2867  ret <4 x float> %v
2868}
2869
2870define <4 x float> @load_splat_v4f32_from_numeric_address() {
2871; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
2872; CHECK:         .functype load_splat_v4f32_from_numeric_address () -> (v128)
2873; CHECK-NEXT:  # %bb.0:
2874; CHECK-NEXT:    i32.const 0
2875; CHECK-NEXT:    v32x4.load_splat 32
2876; CHECK-NEXT:    # fallthrough-return
2877  %s = inttoptr i32 32 to float*
2878  %e = load float, float* %s
2879  %v1 = insertelement <4 x float> undef, float %e, i32 0
2880  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2881  ret <4 x float> %v2
2882}
2883
2884@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
2885define <4 x float> @load_v4f32_from_global_address() {
2886; CHECK-LABEL: load_v4f32_from_global_address:
2887; CHECK:         .functype load_v4f32_from_global_address () -> (v128)
2888; CHECK-NEXT:  # %bb.0:
2889; CHECK-NEXT:    i32.const 0
2890; CHECK-NEXT:    v128.load gv_v4f32
2891; CHECK-NEXT:    # fallthrough-return
2892  %v = load <4 x float>, <4 x float>* @gv_v4f32
2893  ret <4 x float> %v
2894}
2895
2896@gv_f32 = global float 42.
2897define <4 x float> @load_splat_v4f32_from_global_address() {
2898; CHECK-LABEL: load_splat_v4f32_from_global_address:
2899; CHECK:         .functype load_splat_v4f32_from_global_address () -> (v128)
2900; CHECK-NEXT:  # %bb.0:
2901; CHECK-NEXT:    i32.const 0
2902; CHECK-NEXT:    v32x4.load_splat gv_f32
2903; CHECK-NEXT:    # fallthrough-return
2904  %e = load float, float* @gv_f32
2905  %v1 = insertelement <4 x float> undef, float %e, i32 0
2906  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2907  ret <4 x float> %v2
2908}
2909
2910define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
2911; CHECK-LABEL: store_v4f32:
2912; CHECK:         .functype store_v4f32 (v128, i32) -> ()
2913; CHECK-NEXT:  # %bb.0:
2914; CHECK-NEXT:    local.get 1
2915; CHECK-NEXT:    local.get 0
2916; CHECK-NEXT:    v128.store 0
2917; CHECK-NEXT:    # fallthrough-return
2918  store <4 x float> %v , <4 x float>* %p
2919  ret void
2920}
2921
2922define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
2923; CHECK-LABEL: store_v4f32_with_folded_offset:
2924; CHECK:         .functype store_v4f32_with_folded_offset (v128, i32) -> ()
2925; CHECK-NEXT:  # %bb.0:
2926; CHECK-NEXT:    local.get 1
2927; CHECK-NEXT:    local.get 0
2928; CHECK-NEXT:    v128.store 16
2929; CHECK-NEXT:    # fallthrough-return
2930  %q = ptrtoint <4 x float>* %p to i32
2931  %r = add nuw i32 %q, 16
2932  %s = inttoptr i32 %r to <4 x float>*
2933  store <4 x float> %v , <4 x float>* %s
2934  ret void
2935}
2936
2937define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2938; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
2939; CHECK:         .functype store_v4f32_with_folded_gep_offset (v128, i32) -> ()
2940; CHECK-NEXT:  # %bb.0:
2941; CHECK-NEXT:    local.get 1
2942; CHECK-NEXT:    local.get 0
2943; CHECK-NEXT:    v128.store 16
2944; CHECK-NEXT:    # fallthrough-return
2945  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2946  store <4 x float> %v , <4 x float>* %s
2947  ret void
2948}
2949
2950define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
2951; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
2952; CHECK:         .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> ()
2953; CHECK-NEXT:  # %bb.0:
2954; CHECK-NEXT:    local.get 1
2955; CHECK-NEXT:    i32.const -16
2956; CHECK-NEXT:    i32.add
2957; CHECK-NEXT:    local.get 0
2958; CHECK-NEXT:    v128.store 0
2959; CHECK-NEXT:    # fallthrough-return
2960  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2961  store <4 x float> %v , <4 x float>* %s
2962  ret void
2963}
2964
2965define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
2966; CHECK-LABEL: store_v4f32_with_unfolded_offset:
2967; CHECK:         .functype store_v4f32_with_unfolded_offset (v128, i32) -> ()
2968; CHECK-NEXT:  # %bb.0:
2969; CHECK-NEXT:    local.get 1
2970; CHECK-NEXT:    i32.const 16
2971; CHECK-NEXT:    i32.add
2972; CHECK-NEXT:    local.get 0
2973; CHECK-NEXT:    v128.store 0
2974; CHECK-NEXT:    # fallthrough-return
2975  %q = ptrtoint <4 x float>* %p to i32
2976  %r = add nsw i32 %q, 16
2977  %s = inttoptr i32 %r to <4 x float>*
2978  store <4 x float> %v , <4 x float>* %s
2979  ret void
2980}
2981
2982define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2983; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
2984; CHECK:         .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> ()
2985; CHECK-NEXT:  # %bb.0:
2986; CHECK-NEXT:    local.get 1
2987; CHECK-NEXT:    i32.const 16
2988; CHECK-NEXT:    i32.add
2989; CHECK-NEXT:    local.get 0
2990; CHECK-NEXT:    v128.store 0
2991; CHECK-NEXT:    # fallthrough-return
2992  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2993  store <4 x float> %v , <4 x float>* %s
2994  ret void
2995}
2996
2997define void @store_v4f32_to_numeric_address(<4 x float> %v) {
2998; CHECK-LABEL: store_v4f32_to_numeric_address:
2999; CHECK:         .functype store_v4f32_to_numeric_address (v128) -> ()
3000; CHECK-NEXT:  # %bb.0:
3001; CHECK-NEXT:    i32.const 0
3002; CHECK-NEXT:    local.get 0
3003; CHECK-NEXT:    v128.store 32
3004; CHECK-NEXT:    # fallthrough-return
3005  %s = inttoptr i32 32 to <4 x float>*
3006  store <4 x float> %v , <4 x float>* %s
3007  ret void
3008}
3009
3010define void @store_v4f32_to_global_address(<4 x float> %v) {
3011; CHECK-LABEL: store_v4f32_to_global_address:
3012; CHECK:         .functype store_v4f32_to_global_address (v128) -> ()
3013; CHECK-NEXT:  # %bb.0:
3014; CHECK-NEXT:    i32.const 0
3015; CHECK-NEXT:    local.get 0
3016; CHECK-NEXT:    v128.store gv_v4f32
3017; CHECK-NEXT:    # fallthrough-return
3018  store <4 x float> %v , <4 x float>* @gv_v4f32
3019  ret void
3020}
3021
3022; ==============================================================================
3023; 2 x double
3024; ==============================================================================
3025define <2 x double> @load_v2f64(<2 x double>* %p) {
3026; CHECK-LABEL: load_v2f64:
3027; CHECK:         .functype load_v2f64 (i32) -> (v128)
3028; CHECK-NEXT:  # %bb.0:
3029; CHECK-NEXT:    local.get 0
3030; CHECK-NEXT:    v128.load 0
3031; CHECK-NEXT:    # fallthrough-return
3032  %v = load <2 x double>, <2 x double>* %p
3033  ret <2 x double> %v
3034}
3035
3036define <2 x double> @load_splat_v2f64(double* %p) {
3037; CHECK-LABEL: load_splat_v2f64:
3038; CHECK:         .functype load_splat_v2f64 (i32) -> (v128)
3039; CHECK-NEXT:  # %bb.0:
3040; CHECK-NEXT:    local.get 0
3041; CHECK-NEXT:    v64x2.load_splat 0
3042; CHECK-NEXT:    # fallthrough-return
3043  %e = load double, double* %p
3044  %v1 = insertelement <2 x double> undef, double %e, i32 0
3045  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3046  ret <2 x double> %v2
3047}
3048
3049define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
3050; CHECK-LABEL: load_v2f64_with_folded_offset:
3051; CHECK:         .functype load_v2f64_with_folded_offset (i32) -> (v128)
3052; CHECK-NEXT:  # %bb.0:
3053; CHECK-NEXT:    local.get 0
3054; CHECK-NEXT:    v128.load 16
3055; CHECK-NEXT:    # fallthrough-return
3056  %q = ptrtoint <2 x double>* %p to i32
3057  %r = add nuw i32 %q, 16
3058  %s = inttoptr i32 %r to <2 x double>*
3059  %v = load <2 x double>, <2 x double>* %s
3060  ret <2 x double> %v
3061}
3062
3063define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
3064; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
3065; CHECK:         .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
3066; CHECK-NEXT:  # %bb.0:
3067; CHECK-NEXT:    local.get 0
3068; CHECK-NEXT:    v64x2.load_splat 16
3069; CHECK-NEXT:    # fallthrough-return
3070  %q = ptrtoint double* %p to i32
3071  %r = add nuw i32 %q, 16
3072  %s = inttoptr i32 %r to double*
3073  %e = load double, double* %s
3074  %v1 = insertelement <2 x double> undef, double %e, i32 0
3075  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3076  ret <2 x double> %v2
3077}
3078
3079define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
3080; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
3081; CHECK:         .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
3082; CHECK-NEXT:  # %bb.0:
3083; CHECK-NEXT:    local.get 0
3084; CHECK-NEXT:    v128.load 16
3085; CHECK-NEXT:    # fallthrough-return
3086  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
3087  %v = load <2 x double>, <2 x double>* %s
3088  ret <2 x double> %v
3089}
3090
3091define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
3092; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
3093; CHECK:         .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
3094; CHECK-NEXT:  # %bb.0:
3095; CHECK-NEXT:    local.get 0
3096; CHECK-NEXT:    v64x2.load_splat 8
3097; CHECK-NEXT:    # fallthrough-return
3098  %s = getelementptr inbounds double, double* %p, i32 1
3099  %e = load double, double* %s
3100  %v1 = insertelement <2 x double> undef, double %e, i32 0
3101  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3102  ret <2 x double> %v2
3103}
3104
3105define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
3106; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
3107; CHECK:         .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3108; CHECK-NEXT:  # %bb.0:
3109; CHECK-NEXT:    local.get 0
3110; CHECK-NEXT:    i32.const -16
3111; CHECK-NEXT:    i32.add
3112; CHECK-NEXT:    v128.load 0
3113; CHECK-NEXT:    # fallthrough-return
3114  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
3115  %v = load <2 x double>, <2 x double>* %s
3116  ret <2 x double> %v
3117}
3118
3119define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
3120; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
3121; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3122; CHECK-NEXT:  # %bb.0:
3123; CHECK-NEXT:    local.get 0
3124; CHECK-NEXT:    i32.const -8
3125; CHECK-NEXT:    i32.add
3126; CHECK-NEXT:    v64x2.load_splat 0
3127; CHECK-NEXT:    # fallthrough-return
3128  %s = getelementptr inbounds double, double* %p, i32 -1
3129  %e = load double, double* %s
3130  %v1 = insertelement <2 x double> undef, double %e, i32 0
3131  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3132  ret <2 x double> %v2
3133}
3134
3135define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
3136; CHECK-LABEL: load_v2f64_with_unfolded_offset:
3137; CHECK:         .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
3138; CHECK-NEXT:  # %bb.0:
3139; CHECK-NEXT:    local.get 0
3140; CHECK-NEXT:    i32.const 16
3141; CHECK-NEXT:    i32.add
3142; CHECK-NEXT:    v128.load 0
3143; CHECK-NEXT:    # fallthrough-return
3144  %q = ptrtoint <2 x double>* %p to i32
3145  %r = add nsw i32 %q, 16
3146  %s = inttoptr i32 %r to <2 x double>*
3147  %v = load <2 x double>, <2 x double>* %s
3148  ret <2 x double> %v
3149}
3150
3151define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
3152; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
3153; CHECK:         .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128)
3154; CHECK-NEXT:  # %bb.0:
3155; CHECK-NEXT:    local.get 0
3156; CHECK-NEXT:    i32.const 16
3157; CHECK-NEXT:    i32.add
3158; CHECK-NEXT:    v64x2.load_splat 0
3159; CHECK-NEXT:    # fallthrough-return
3160  %q = ptrtoint double* %p to i32
3161  %r = add nsw i32 %q, 16
3162  %s = inttoptr i32 %r to double*
3163  %e = load double, double* %s
3164  %v1 = insertelement <2 x double> undef, double %e, i32 0
3165  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3166  ret <2 x double> %v2
3167}
3168
3169define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
3170; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
3171; CHECK:         .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3172; CHECK-NEXT:  # %bb.0:
3173; CHECK-NEXT:    local.get 0
3174; CHECK-NEXT:    i32.const 16
3175; CHECK-NEXT:    i32.add
3176; CHECK-NEXT:    v128.load 0
3177; CHECK-NEXT:    # fallthrough-return
3178  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
3179  %v = load <2 x double>, <2 x double>* %s
3180  ret <2 x double> %v
3181}
3182
3183define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
3184; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
3185; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3186; CHECK-NEXT:  # %bb.0:
3187; CHECK-NEXT:    local.get 0
3188; CHECK-NEXT:    i32.const 8
3189; CHECK-NEXT:    i32.add
3190; CHECK-NEXT:    v64x2.load_splat 0
3191; CHECK-NEXT:    # fallthrough-return
3192  %s = getelementptr double, double* %p, i32 1
3193  %e = load double, double* %s
3194  %v1 = insertelement <2 x double> undef, double %e, i32 0
3195  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3196  ret <2 x double> %v2
3197}
3198
3199define <2 x double> @load_v2f64_from_numeric_address() {
3200; CHECK-LABEL: load_v2f64_from_numeric_address:
3201; CHECK:         .functype load_v2f64_from_numeric_address () -> (v128)
3202; CHECK-NEXT:  # %bb.0:
3203; CHECK-NEXT:    i32.const 0
3204; CHECK-NEXT:    v128.load 32
3205; CHECK-NEXT:    # fallthrough-return
3206  %s = inttoptr i32 32 to <2 x double>*
3207  %v = load <2 x double>, <2 x double>* %s
3208  ret <2 x double> %v
3209}
3210
3211define <2 x double> @load_splat_v2f64_from_numeric_address() {
3212; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
3213; CHECK:         .functype load_splat_v2f64_from_numeric_address () -> (v128)
3214; CHECK-NEXT:  # %bb.0:
3215; CHECK-NEXT:    i32.const 0
3216; CHECK-NEXT:    v64x2.load_splat 32
3217; CHECK-NEXT:    # fallthrough-return
3218  %s = inttoptr i32 32 to double*
3219  %e = load double, double* %s
3220  %v1 = insertelement <2 x double> undef, double %e, i32 0
3221  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3222  ret <2 x double> %v2
3223}
3224
3225@gv_v2f64 = global <2 x double> <double 42., double 42.>
3226define <2 x double> @load_v2f64_from_global_address() {
3227; CHECK-LABEL: load_v2f64_from_global_address:
3228; CHECK:         .functype load_v2f64_from_global_address () -> (v128)
3229; CHECK-NEXT:  # %bb.0:
3230; CHECK-NEXT:    i32.const 0
3231; CHECK-NEXT:    v128.load gv_v2f64
3232; CHECK-NEXT:    # fallthrough-return
3233  %v = load <2 x double>, <2 x double>* @gv_v2f64
3234  ret <2 x double> %v
3235}
3236
3237@gv_f64 = global double 42.
3238define <2 x double> @load_splat_v2f64_from_global_address() {
3239; CHECK-LABEL: load_splat_v2f64_from_global_address:
3240; CHECK:         .functype load_splat_v2f64_from_global_address () -> (v128)
3241; CHECK-NEXT:  # %bb.0:
3242; CHECK-NEXT:    i32.const 0
3243; CHECK-NEXT:    v64x2.load_splat gv_f64
3244; CHECK-NEXT:    # fallthrough-return
3245  %e = load double, double* @gv_f64
3246  %v1 = insertelement <2 x double> undef, double %e, i32 0
3247  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3248  ret <2 x double> %v2
3249}
3250
3251define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
3252; CHECK-LABEL: store_v2f64:
3253; CHECK:         .functype store_v2f64 (v128, i32) -> ()
3254; CHECK-NEXT:  # %bb.0:
3255; CHECK-NEXT:    local.get 1
3256; CHECK-NEXT:    local.get 0
3257; CHECK-NEXT:    v128.store 0
3258; CHECK-NEXT:    # fallthrough-return
3259  store <2 x double> %v , <2 x double>* %p
3260  ret void
3261}
3262
3263define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
3264; CHECK-LABEL: store_v2f64_with_folded_offset:
3265; CHECK:         .functype store_v2f64_with_folded_offset (v128, i32) -> ()
3266; CHECK-NEXT:  # %bb.0:
3267; CHECK-NEXT:    local.get 1
3268; CHECK-NEXT:    local.get 0
3269; CHECK-NEXT:    v128.store 16
3270; CHECK-NEXT:    # fallthrough-return
3271  %q = ptrtoint <2 x double>* %p to i32
3272  %r = add nuw i32 %q, 16
3273  %s = inttoptr i32 %r to <2 x double>*
3274  store <2 x double> %v , <2 x double>* %s
3275  ret void
3276}
3277
3278define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
3279; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
3280; CHECK:         .functype store_v2f64_with_folded_gep_offset (v128, i32) -> ()
3281; CHECK-NEXT:  # %bb.0:
3282; CHECK-NEXT:    local.get 1
3283; CHECK-NEXT:    local.get 0
3284; CHECK-NEXT:    v128.store 16
3285; CHECK-NEXT:    # fallthrough-return
3286  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
3287  store <2 x double> %v , <2 x double>* %s
3288  ret void
3289}
3290
3291define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
3292; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
3293; CHECK:         .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> ()
3294; CHECK-NEXT:  # %bb.0:
3295; CHECK-NEXT:    local.get 1
3296; CHECK-NEXT:    i32.const -16
3297; CHECK-NEXT:    i32.add
3298; CHECK-NEXT:    local.get 0
3299; CHECK-NEXT:    v128.store 0
3300; CHECK-NEXT:    # fallthrough-return
3301  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
3302  store <2 x double> %v , <2 x double>* %s
3303  ret void
3304}
3305
3306define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
3307; CHECK-LABEL: store_v2f64_with_unfolded_offset:
3308; CHECK:         .functype store_v2f64_with_unfolded_offset (v128, i32) -> ()
3309; CHECK-NEXT:  # %bb.0:
3310; CHECK-NEXT:    local.get 1
3311; CHECK-NEXT:    i32.const 16
3312; CHECK-NEXT:    i32.add
3313; CHECK-NEXT:    local.get 0
3314; CHECK-NEXT:    v128.store 0
3315; CHECK-NEXT:    # fallthrough-return
3316  %q = ptrtoint <2 x double>* %p to i32
3317  %r = add nsw i32 %q, 16
3318  %s = inttoptr i32 %r to <2 x double>*
3319  store <2 x double> %v , <2 x double>* %s
3320  ret void
3321}
3322
3323define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
3324; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
3325; CHECK:         .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> ()
3326; CHECK-NEXT:  # %bb.0:
3327; CHECK-NEXT:    local.get 1
3328; CHECK-NEXT:    i32.const 16
3329; CHECK-NEXT:    i32.add
3330; CHECK-NEXT:    local.get 0
3331; CHECK-NEXT:    v128.store 0
3332; CHECK-NEXT:    # fallthrough-return
3333  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
3334  store <2 x double> %v , <2 x double>* %s
3335  ret void
3336}
3337
3338define void @store_v2f64_to_numeric_address(<2 x double> %v) {
3339; CHECK-LABEL: store_v2f64_to_numeric_address:
3340; CHECK:         .functype store_v2f64_to_numeric_address (v128) -> ()
3341; CHECK-NEXT:  # %bb.0:
3342; CHECK-NEXT:    i32.const 0
3343; CHECK-NEXT:    local.get 0
3344; CHECK-NEXT:    v128.store 32
3345; CHECK-NEXT:    # fallthrough-return
3346  %s = inttoptr i32 32 to <2 x double>*
3347  store <2 x double> %v , <2 x double>* %s
3348  ret void
3349}
3350
3351define void @store_v2f64_to_global_address(<2 x double> %v) {
3352; CHECK-LABEL: store_v2f64_to_global_address:
3353; CHECK:         .functype store_v2f64_to_global_address (v128) -> ()
3354; CHECK-NEXT:  # %bb.0:
3355; CHECK-NEXT:    i32.const 0
3356; CHECK-NEXT:    local.get 0
3357; CHECK-NEXT:    v128.store gv_v2f64
3358; CHECK-NEXT:    # fallthrough-return
3359  store <2 x double> %v , <2 x double>* @gv_v2f64
3360  ret void
3361}
3362