1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test SIMD loads and stores
5
6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
7target triple = "wasm32-unknown-unknown"
8
9; ==============================================================================
10; 16 x i8
11; ==============================================================================
12define <16 x i8> @load_v16i8(<16 x i8>* %p) {
13; CHECK-LABEL: load_v16i8:
14; CHECK:         .functype load_v16i8 (i32) -> (v128)
15; CHECK-NEXT:  # %bb.0:
16; CHECK-NEXT:    local.get 0
17; CHECK-NEXT:    v128.load 0
18; CHECK-NEXT:    # fallthrough-return
19  %v = load <16 x i8>, <16 x i8>* %p
20  ret <16 x i8> %v
21}
22
23define <16 x i8> @load_splat_v16i8(i8* %p) {
24; CHECK-LABEL: load_splat_v16i8:
25; CHECK:         .functype load_splat_v16i8 (i32) -> (v128)
26; CHECK-NEXT:  # %bb.0:
27; CHECK-NEXT:    local.get 0
28; CHECK-NEXT:    v8x16.load_splat 0
29; CHECK-NEXT:    # fallthrough-return
30  %e = load i8, i8* %p
31  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
32  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
33  ret <16 x i8> %v2
34}
35
36define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
37; CHECK-LABEL: load_v16i8_with_folded_offset:
38; CHECK:         .functype load_v16i8_with_folded_offset (i32) -> (v128)
39; CHECK-NEXT:  # %bb.0:
40; CHECK-NEXT:    local.get 0
41; CHECK-NEXT:    v128.load 16
42; CHECK-NEXT:    # fallthrough-return
43  %q = ptrtoint <16 x i8>* %p to i32
44  %r = add nuw i32 %q, 16
45  %s = inttoptr i32 %r to <16 x i8>*
46  %v = load <16 x i8>, <16 x i8>* %s
47  ret <16 x i8> %v
48}
49
50define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
51; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
52; CHECK:         .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
53; CHECK-NEXT:  # %bb.0:
54; CHECK-NEXT:    local.get 0
55; CHECK-NEXT:    v8x16.load_splat 16
56; CHECK-NEXT:    # fallthrough-return
57  %q = ptrtoint i8* %p to i32
58  %r = add nuw i32 %q, 16
59  %s = inttoptr i32 %r to i8*
60  %e = load i8, i8* %s
61  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
62  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
63  ret <16 x i8> %v2
64}
65
66define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
67; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
68; CHECK:         .functype load_v16i8_with_folded_gep_offset (i32) -> (v128)
69; CHECK-NEXT:  # %bb.0:
70; CHECK-NEXT:    local.get 0
71; CHECK-NEXT:    v128.load 16
72; CHECK-NEXT:    # fallthrough-return
73  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
74  %v = load <16 x i8>, <16 x i8>* %s
75  ret <16 x i8> %v
76}
77
78define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
79; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
80; CHECK:         .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
81; CHECK-NEXT:  # %bb.0:
82; CHECK-NEXT:    local.get 0
83; CHECK-NEXT:    v8x16.load_splat 1
84; CHECK-NEXT:    # fallthrough-return
85  %s = getelementptr inbounds i8, i8* %p, i32 1
86  %e = load i8, i8* %s
87  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
88  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
89  ret <16 x i8> %v2
90}
91
92define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
93; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
94; CHECK:         .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
95; CHECK-NEXT:  # %bb.0:
96; CHECK-NEXT:    local.get 0
97; CHECK-NEXT:    i32.const -16
98; CHECK-NEXT:    i32.add
99; CHECK-NEXT:    v128.load 0
100; CHECK-NEXT:    # fallthrough-return
101  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
102  %v = load <16 x i8>, <16 x i8>* %s
103  ret <16 x i8> %v
104}
105
106define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
107; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
108; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
109; CHECK-NEXT:  # %bb.0:
110; CHECK-NEXT:    local.get 0
111; CHECK-NEXT:    i32.const -1
112; CHECK-NEXT:    i32.add
113; CHECK-NEXT:    v8x16.load_splat 0
114; CHECK-NEXT:    # fallthrough-return
115  %s = getelementptr inbounds i8, i8* %p, i32 -1
116  %e = load i8, i8* %s
117  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
118  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
119  ret <16 x i8> %v2
120}
121
122define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
123; CHECK-LABEL: load_v16i8_with_unfolded_offset:
124; CHECK:         .functype load_v16i8_with_unfolded_offset (i32) -> (v128)
125; CHECK-NEXT:  # %bb.0:
126; CHECK-NEXT:    local.get 0
127; CHECK-NEXT:    i32.const 16
128; CHECK-NEXT:    i32.add
129; CHECK-NEXT:    v128.load 0
130; CHECK-NEXT:    # fallthrough-return
131  %q = ptrtoint <16 x i8>* %p to i32
132  %r = add nsw i32 %q, 16
133  %s = inttoptr i32 %r to <16 x i8>*
134  %v = load <16 x i8>, <16 x i8>* %s
135  ret <16 x i8> %v
136}
137
138define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
139; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
140; CHECK:         .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128)
141; CHECK-NEXT:  # %bb.0:
142; CHECK-NEXT:    local.get 0
143; CHECK-NEXT:    i32.const 16
144; CHECK-NEXT:    i32.add
145; CHECK-NEXT:    v8x16.load_splat 0
146; CHECK-NEXT:    # fallthrough-return
147  %q = ptrtoint i8* %p to i32
148  %r = add nsw i32 %q, 16
149  %s = inttoptr i32 %r to i8*
150  %e = load i8, i8* %s
151  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
152  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
153  ret <16 x i8> %v2
154}
155
156define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
157; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
158; CHECK:         .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128)
159; CHECK-NEXT:  # %bb.0:
160; CHECK-NEXT:    local.get 0
161; CHECK-NEXT:    i32.const 16
162; CHECK-NEXT:    i32.add
163; CHECK-NEXT:    v128.load 0
164; CHECK-NEXT:    # fallthrough-return
165  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
166  %v = load <16 x i8>, <16 x i8>* %s
167  ret <16 x i8> %v
168}
169
170define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
171; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
172; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128)
173; CHECK-NEXT:  # %bb.0:
174; CHECK-NEXT:    local.get 0
175; CHECK-NEXT:    i32.const 1
176; CHECK-NEXT:    i32.add
177; CHECK-NEXT:    v8x16.load_splat 0
178; CHECK-NEXT:    # fallthrough-return
179  %s = getelementptr i8, i8* %p, i32 1
180  %e = load i8, i8* %s
181  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
182  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
183  ret <16 x i8> %v2
184}
185
186define <16 x i8> @load_v16i8_from_numeric_address() {
187; CHECK-LABEL: load_v16i8_from_numeric_address:
188; CHECK:         .functype load_v16i8_from_numeric_address () -> (v128)
189; CHECK-NEXT:  # %bb.0:
190; CHECK-NEXT:    i32.const 0
191; CHECK-NEXT:    v128.load 32
192; CHECK-NEXT:    # fallthrough-return
193  %s = inttoptr i32 32 to <16 x i8>*
194  %v = load <16 x i8>, <16 x i8>* %s
195  ret <16 x i8> %v
196}
197
198define <16 x i8> @load_splat_v16i8_from_numeric_address() {
199; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
200; CHECK:         .functype load_splat_v16i8_from_numeric_address () -> (v128)
201; CHECK-NEXT:  # %bb.0:
202; CHECK-NEXT:    i32.const 0
203; CHECK-NEXT:    v8x16.load_splat 32
204; CHECK-NEXT:    # fallthrough-return
205  %s = inttoptr i32 32 to i8*
206  %e = load i8, i8* %s
207  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
208  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
209  ret <16 x i8> %v2
210}
211
212@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
213define <16 x i8> @load_v16i8_from_global_address() {
214; CHECK-LABEL: load_v16i8_from_global_address:
215; CHECK:         .functype load_v16i8_from_global_address () -> (v128)
216; CHECK-NEXT:  # %bb.0:
217; CHECK-NEXT:    i32.const 0
218; CHECK-NEXT:    v128.load gv_v16i8
219; CHECK-NEXT:    # fallthrough-return
220  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
221  ret <16 x i8> %v
222}
223
224@gv_i8 = global i8 42
225define <16 x i8> @load_splat_v16i8_from_global_address() {
226; CHECK-LABEL: load_splat_v16i8_from_global_address:
227; CHECK:         .functype load_splat_v16i8_from_global_address () -> (v128)
228; CHECK-NEXT:  # %bb.0:
229; CHECK-NEXT:    i32.const 0
230; CHECK-NEXT:    v8x16.load_splat gv_i8
231; CHECK-NEXT:    # fallthrough-return
232  %e = load i8, i8* @gv_i8
233  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
234  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
235  ret <16 x i8> %v2
236}
237
238define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
239; CHECK-LABEL: store_v16i8:
240; CHECK:         .functype store_v16i8 (v128, i32) -> ()
241; CHECK-NEXT:  # %bb.0:
242; CHECK-NEXT:    local.get 1
243; CHECK-NEXT:    local.get 0
244; CHECK-NEXT:    v128.store 0
245; CHECK-NEXT:    # fallthrough-return
246  store <16 x i8> %v , <16 x i8>* %p
247  ret void
248}
249
250define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
251; CHECK-LABEL: store_v16i8_with_folded_offset:
252; CHECK:         .functype store_v16i8_with_folded_offset (v128, i32) -> ()
253; CHECK-NEXT:  # %bb.0:
254; CHECK-NEXT:    local.get 1
255; CHECK-NEXT:    local.get 0
256; CHECK-NEXT:    v128.store 16
257; CHECK-NEXT:    # fallthrough-return
258  %q = ptrtoint <16 x i8>* %p to i32
259  %r = add nuw i32 %q, 16
260  %s = inttoptr i32 %r to <16 x i8>*
261  store <16 x i8> %v , <16 x i8>* %s
262  ret void
263}
264
265define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
266; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
267; CHECK:         .functype store_v16i8_with_folded_gep_offset (v128, i32) -> ()
268; CHECK-NEXT:  # %bb.0:
269; CHECK-NEXT:    local.get 1
270; CHECK-NEXT:    local.get 0
271; CHECK-NEXT:    v128.store 16
272; CHECK-NEXT:    # fallthrough-return
273  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
274  store <16 x i8> %v , <16 x i8>* %s
275  ret void
276}
277
278define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
279; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
280; CHECK:         .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
281; CHECK-NEXT:  # %bb.0:
282; CHECK-NEXT:    local.get 1
283; CHECK-NEXT:    i32.const -16
284; CHECK-NEXT:    i32.add
285; CHECK-NEXT:    local.get 0
286; CHECK-NEXT:    v128.store 0
287; CHECK-NEXT:    # fallthrough-return
288  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
289  store <16 x i8> %v , <16 x i8>* %s
290  ret void
291}
292
293define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
294; CHECK-LABEL: store_v16i8_with_unfolded_offset:
295; CHECK:         .functype store_v16i8_with_unfolded_offset (v128, i32) -> ()
296; CHECK-NEXT:  # %bb.0:
297; CHECK-NEXT:    local.get 1
298; CHECK-NEXT:    i32.const 16
299; CHECK-NEXT:    i32.add
300; CHECK-NEXT:    local.get 0
301; CHECK-NEXT:    v128.store 0
302; CHECK-NEXT:    # fallthrough-return
303  %q = ptrtoint <16 x i8>* %p to i32
304  %r = add nsw i32 %q, 16
305  %s = inttoptr i32 %r to <16 x i8>*
306  store <16 x i8> %v , <16 x i8>* %s
307  ret void
308}
309
310define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
311; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
312; CHECK:         .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> ()
313; CHECK-NEXT:  # %bb.0:
314; CHECK-NEXT:    local.get 1
315; CHECK-NEXT:    i32.const 16
316; CHECK-NEXT:    i32.add
317; CHECK-NEXT:    local.get 0
318; CHECK-NEXT:    v128.store 0
319; CHECK-NEXT:    # fallthrough-return
320  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
321  store <16 x i8> %v , <16 x i8>* %s
322  ret void
323}
324
325define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
326; CHECK-LABEL: store_v16i8_to_numeric_address:
327; CHECK:         .functype store_v16i8_to_numeric_address (v128) -> ()
328; CHECK-NEXT:  # %bb.0:
329; CHECK-NEXT:    i32.const 0
330; CHECK-NEXT:    local.get 0
331; CHECK-NEXT:    v128.store 32
332; CHECK-NEXT:    # fallthrough-return
333  %s = inttoptr i32 32 to <16 x i8>*
334  store <16 x i8> %v , <16 x i8>* %s
335  ret void
336}
337
338define void @store_v16i8_to_global_address(<16 x i8> %v) {
339; CHECK-LABEL: store_v16i8_to_global_address:
340; CHECK:         .functype store_v16i8_to_global_address (v128) -> ()
341; CHECK-NEXT:  # %bb.0:
342; CHECK-NEXT:    i32.const 0
343; CHECK-NEXT:    local.get 0
344; CHECK-NEXT:    v128.store gv_v16i8
345; CHECK-NEXT:    # fallthrough-return
346  store <16 x i8> %v , <16 x i8>* @gv_v16i8
347  ret void
348}
349
350; ==============================================================================
351; 8 x i16
352; ==============================================================================
353define <8 x i16> @load_v8i16(<8 x i16>* %p) {
354; CHECK-LABEL: load_v8i16:
355; CHECK:         .functype load_v8i16 (i32) -> (v128)
356; CHECK-NEXT:  # %bb.0:
357; CHECK-NEXT:    local.get 0
358; CHECK-NEXT:    v128.load 0
359; CHECK-NEXT:    # fallthrough-return
360  %v = load <8 x i16>, <8 x i16>* %p
361  ret <8 x i16> %v
362}
363
364define <8 x i16> @load_splat_v8i16(i16* %p) {
365; CHECK-LABEL: load_splat_v8i16:
366; CHECK:         .functype load_splat_v8i16 (i32) -> (v128)
367; CHECK-NEXT:  # %bb.0:
368; CHECK-NEXT:    local.get 0
369; CHECK-NEXT:    v16x8.load_splat 0
370; CHECK-NEXT:    # fallthrough-return
371  %e = load i16, i16* %p
372  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
373  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
374  ret <8 x i16> %v2
375}
376
377define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
378; CHECK-LABEL: load_sext_v8i16:
379; CHECK:         .functype load_sext_v8i16 (i32) -> (v128)
380; CHECK-NEXT:  # %bb.0:
381; CHECK-NEXT:    local.get 0
382; CHECK-NEXT:    i16x8.load8x8_s 0
383; CHECK-NEXT:    # fallthrough-return
384  %v = load <8 x i8>, <8 x i8>* %p
385  %v2 = sext <8 x i8> %v to <8 x i16>
386  ret <8 x i16> %v2
387}
388
389define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
390; CHECK-LABEL: load_zext_v8i16:
391; CHECK:         .functype load_zext_v8i16 (i32) -> (v128)
392; CHECK-NEXT:  # %bb.0:
393; CHECK-NEXT:    local.get 0
394; CHECK-NEXT:    i16x8.load8x8_u 0
395; CHECK-NEXT:    # fallthrough-return
396  %v = load <8 x i8>, <8 x i8>* %p
397  %v2 = zext <8 x i8> %v to <8 x i16>
398  ret <8 x i16> %v2
399}
400
401define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
402; CHECK-LABEL: load_ext_v8i16:
403; CHECK:         .functype load_ext_v8i16 (i32) -> (v128)
404; CHECK-NEXT:  # %bb.0:
405; CHECK-NEXT:    local.get 0
406; CHECK-NEXT:    i16x8.load8x8_u 0
407; CHECK-NEXT:    # fallthrough-return
408  %v = load <8 x i8>, <8 x i8>* %p
409  ret <8 x i8> %v
410}
411
412define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
413; CHECK-LABEL: load_v8i16_with_folded_offset:
414; CHECK:         .functype load_v8i16_with_folded_offset (i32) -> (v128)
415; CHECK-NEXT:  # %bb.0:
416; CHECK-NEXT:    local.get 0
417; CHECK-NEXT:    v128.load 16
418; CHECK-NEXT:    # fallthrough-return
419  %q = ptrtoint <8 x i16>* %p to i32
420  %r = add nuw i32 %q, 16
421  %s = inttoptr i32 %r to <8 x i16>*
422  %v = load <8 x i16>, <8 x i16>* %s
423  ret <8 x i16> %v
424}
425
426define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
427; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
428; CHECK:         .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
429; CHECK-NEXT:  # %bb.0:
430; CHECK-NEXT:    local.get 0
431; CHECK-NEXT:    v16x8.load_splat 16
432; CHECK-NEXT:    # fallthrough-return
433  %q = ptrtoint i16* %p to i32
434  %r = add nuw i32 %q, 16
435  %s = inttoptr i32 %r to i16*
436  %e = load i16, i16* %s
437  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
438  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
439  ret <8 x i16> %v2
440}
441
442define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
443; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
444; CHECK:         .functype load_sext_v8i16_with_folded_offset (i32) -> (v128)
445; CHECK-NEXT:  # %bb.0:
446; CHECK-NEXT:    local.get 0
447; CHECK-NEXT:    i16x8.load8x8_s 16
448; CHECK-NEXT:    # fallthrough-return
449  %q = ptrtoint <8 x i8>* %p to i32
450  %r = add nuw i32 %q, 16
451  %s = inttoptr i32 %r to <8 x i8>*
452  %v = load <8 x i8>, <8 x i8>* %s
453  %v2 = sext <8 x i8> %v to <8 x i16>
454  ret <8 x i16> %v2
455}
456
457define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
458; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
459; CHECK:         .functype load_zext_v8i16_with_folded_offset (i32) -> (v128)
460; CHECK-NEXT:  # %bb.0:
461; CHECK-NEXT:    local.get 0
462; CHECK-NEXT:    i16x8.load8x8_u 16
463; CHECK-NEXT:    # fallthrough-return
464  %q = ptrtoint <8 x i8>* %p to i32
465  %r = add nuw i32 %q, 16
466  %s = inttoptr i32 %r to <8 x i8>*
467  %v = load <8 x i8>, <8 x i8>* %s
468  %v2 = zext <8 x i8> %v to <8 x i16>
469  ret <8 x i16> %v2
470}
471
472define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
473; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
474; CHECK:         .functype load_ext_v8i16_with_folded_offset (i32) -> (v128)
475; CHECK-NEXT:  # %bb.0:
476; CHECK-NEXT:    local.get 0
477; CHECK-NEXT:    i16x8.load8x8_u 16
478; CHECK-NEXT:    # fallthrough-return
479  %q = ptrtoint <8 x i8>* %p to i32
480  %r = add nuw i32 %q, 16
481  %s = inttoptr i32 %r to <8 x i8>*
482  %v = load <8 x i8>, <8 x i8>* %s
483  ret <8 x i8> %v
484}
485
486define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
487; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
488; CHECK:         .functype load_v8i16_with_folded_gep_offset (i32) -> (v128)
489; CHECK-NEXT:  # %bb.0:
490; CHECK-NEXT:    local.get 0
491; CHECK-NEXT:    v128.load 16
492; CHECK-NEXT:    # fallthrough-return
493  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
494  %v = load <8 x i16>, <8 x i16>* %s
495  ret <8 x i16> %v
496}
497
498define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
499; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
500; CHECK:         .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
501; CHECK-NEXT:  # %bb.0:
502; CHECK-NEXT:    local.get 0
503; CHECK-NEXT:    v16x8.load_splat 2
504; CHECK-NEXT:    # fallthrough-return
505  %s = getelementptr inbounds i16, i16* %p, i32 1
506  %e = load i16, i16* %s
507  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
508  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
509  ret <8 x i16> %v2
510}
511
512define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
513; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
514; CHECK:         .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128)
515; CHECK-NEXT:  # %bb.0:
516; CHECK-NEXT:    local.get 0
517; CHECK-NEXT:    i16x8.load8x8_s 8
518; CHECK-NEXT:    # fallthrough-return
519  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
520  %v = load <8 x i8>, <8 x i8>* %s
521  %v2 = sext <8 x i8> %v to <8 x i16>
522  ret <8 x i16> %v2
523}
524
525define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
526; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
527; CHECK:         .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128)
528; CHECK-NEXT:  # %bb.0:
529; CHECK-NEXT:    local.get 0
530; CHECK-NEXT:    i16x8.load8x8_u 8
531; CHECK-NEXT:    # fallthrough-return
532  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
533  %v = load <8 x i8>, <8 x i8>* %s
534  %v2 = zext <8 x i8> %v to <8 x i16>
535  ret <8 x i16> %v2
536}
537
538define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
539; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
540; CHECK:         .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128)
541; CHECK-NEXT:  # %bb.0:
542; CHECK-NEXT:    local.get 0
543; CHECK-NEXT:    i16x8.load8x8_u 8
544; CHECK-NEXT:    # fallthrough-return
545  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
546  %v = load <8 x i8>, <8 x i8>* %s
547  ret <8 x i8> %v
548}
549
550define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
551; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
552; CHECK:         .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
553; CHECK-NEXT:  # %bb.0:
554; CHECK-NEXT:    local.get 0
555; CHECK-NEXT:    i32.const -16
556; CHECK-NEXT:    i32.add
557; CHECK-NEXT:    v128.load 0
558; CHECK-NEXT:    # fallthrough-return
559  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
560  %v = load <8 x i16>, <8 x i16>* %s
561  ret <8 x i16> %v
562}
563
564define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
565; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
566; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
567; CHECK-NEXT:  # %bb.0:
568; CHECK-NEXT:    local.get 0
569; CHECK-NEXT:    i32.const -2
570; CHECK-NEXT:    i32.add
571; CHECK-NEXT:    v16x8.load_splat 0
572; CHECK-NEXT:    # fallthrough-return
573  %s = getelementptr inbounds i16, i16* %p, i32 -1
574  %e = load i16, i16* %s
575  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
576  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
577  ret <8 x i16> %v2
578}
579
580define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
581; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
582; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
583; CHECK-NEXT:  # %bb.0:
584; CHECK-NEXT:    local.get 0
585; CHECK-NEXT:    i32.const -8
586; CHECK-NEXT:    i32.add
587; CHECK-NEXT:    i16x8.load8x8_s 0
588; CHECK-NEXT:    # fallthrough-return
589  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
590  %v = load <8 x i8>, <8 x i8>* %s
591  %v2 = sext <8 x i8> %v to <8 x i16>
592  ret <8 x i16> %v2
593}
594
595define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
596; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
597; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
598; CHECK-NEXT:  # %bb.0:
599; CHECK-NEXT:    local.get 0
600; CHECK-NEXT:    i32.const -8
601; CHECK-NEXT:    i32.add
602; CHECK-NEXT:    i16x8.load8x8_u 0
603; CHECK-NEXT:    # fallthrough-return
604  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
605  %v = load <8 x i8>, <8 x i8>* %s
606  %v2 = zext <8 x i8> %v to <8 x i16>
607  ret <8 x i16> %v2
608}
609
610define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
611; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
612; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
613; CHECK-NEXT:  # %bb.0:
614; CHECK-NEXT:    local.get 0
615; CHECK-NEXT:    i32.const -8
616; CHECK-NEXT:    i32.add
617; CHECK-NEXT:    i16x8.load8x8_u 0
618; CHECK-NEXT:    # fallthrough-return
619  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
620  %v = load <8 x i8>, <8 x i8>* %s
621  ret <8 x i8> %v
622}
623
624define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
625; CHECK-LABEL: load_v8i16_with_unfolded_offset:
626; CHECK:         .functype load_v8i16_with_unfolded_offset (i32) -> (v128)
627; CHECK-NEXT:  # %bb.0:
628; CHECK-NEXT:    local.get 0
629; CHECK-NEXT:    i32.const 16
630; CHECK-NEXT:    i32.add
631; CHECK-NEXT:    v128.load 0
632; CHECK-NEXT:    # fallthrough-return
633  %q = ptrtoint <8 x i16>* %p to i32
634  %r = add nsw i32 %q, 16
635  %s = inttoptr i32 %r to <8 x i16>*
636  %v = load <8 x i16>, <8 x i16>* %s
637  ret <8 x i16> %v
638}
639
640define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
641; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
642; CHECK:         .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128)
643; CHECK-NEXT:  # %bb.0:
644; CHECK-NEXT:    local.get 0
645; CHECK-NEXT:    i32.const 16
646; CHECK-NEXT:    i32.add
647; CHECK-NEXT:    v16x8.load_splat 0
648; CHECK-NEXT:    # fallthrough-return
649  %q = ptrtoint i16* %p to i32
650  %r = add nsw i32 %q, 16
651  %s = inttoptr i32 %r to i16*
652  %e = load i16, i16* %s
653  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
654  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
655  ret <8 x i16> %v2
656}
657
658define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
659; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
660; CHECK:         .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128)
661; CHECK-NEXT:  # %bb.0:
662; CHECK-NEXT:    local.get 0
663; CHECK-NEXT:    i32.const 16
664; CHECK-NEXT:    i32.add
665; CHECK-NEXT:    i16x8.load8x8_s 0
666; CHECK-NEXT:    # fallthrough-return
667  %q = ptrtoint <8 x i8>* %p to i32
668  %r = add nsw i32 %q, 16
669  %s = inttoptr i32 %r to <8 x i8>*
670  %v = load <8 x i8>, <8 x i8>* %s
671  %v2 = sext <8 x i8> %v to <8 x i16>
672  ret <8 x i16> %v2
673}
674
675define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
676; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
677; CHECK:         .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128)
678; CHECK-NEXT:  # %bb.0:
679; CHECK-NEXT:    local.get 0
680; CHECK-NEXT:    i32.const 16
681; CHECK-NEXT:    i32.add
682; CHECK-NEXT:    i16x8.load8x8_u 0
683; CHECK-NEXT:    # fallthrough-return
684  %q = ptrtoint <8 x i8>* %p to i32
685  %r = add nsw i32 %q, 16
686  %s = inttoptr i32 %r to <8 x i8>*
687  %v = load <8 x i8>, <8 x i8>* %s
688  %v2 = zext <8 x i8> %v to <8 x i16>
689  ret <8 x i16> %v2
690}
691
692define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
693; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
694; CHECK:         .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128)
695; CHECK-NEXT:  # %bb.0:
696; CHECK-NEXT:    local.get 0
697; CHECK-NEXT:    i32.const 16
698; CHECK-NEXT:    i32.add
699; CHECK-NEXT:    i16x8.load8x8_u 0
700; CHECK-NEXT:    # fallthrough-return
701  %q = ptrtoint <8 x i8>* %p to i32
702  %r = add nsw i32 %q, 16
703  %s = inttoptr i32 %r to <8 x i8>*
704  %v = load <8 x i8>, <8 x i8>* %s
705  ret <8 x i8> %v
706}
707
708define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
709; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
710; CHECK:         .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128)
711; CHECK-NEXT:  # %bb.0:
712; CHECK-NEXT:    local.get 0
713; CHECK-NEXT:    i32.const 16
714; CHECK-NEXT:    i32.add
715; CHECK-NEXT:    v128.load 0
716; CHECK-NEXT:    # fallthrough-return
717  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
718  %v = load <8 x i16>, <8 x i16>* %s
719  ret <8 x i16> %v
720}
721
722define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
723; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
724; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128)
725; CHECK-NEXT:  # %bb.0:
726; CHECK-NEXT:    local.get 0
727; CHECK-NEXT:    i32.const 2
728; CHECK-NEXT:    i32.add
729; CHECK-NEXT:    v16x8.load_splat 0
730; CHECK-NEXT:    # fallthrough-return
731  %s = getelementptr i16, i16* %p, i32 1
732  %e = load i16, i16* %s
733  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
734  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
735  ret <8 x i16> %v2
736}
737
738define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
739; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
740; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
741; CHECK-NEXT:  # %bb.0:
742; CHECK-NEXT:    local.get 0
743; CHECK-NEXT:    i32.const 8
744; CHECK-NEXT:    i32.add
745; CHECK-NEXT:    i16x8.load8x8_s 0
746; CHECK-NEXT:    # fallthrough-return
747  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
748  %v = load <8 x i8>, <8 x i8>* %s
749  %v2 = sext <8 x i8> %v to <8 x i16>
750  ret <8 x i16> %v2
751}
752
753define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
754; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
755; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
756; CHECK-NEXT:  # %bb.0:
757; CHECK-NEXT:    local.get 0
758; CHECK-NEXT:    i32.const 8
759; CHECK-NEXT:    i32.add
760; CHECK-NEXT:    i16x8.load8x8_u 0
761; CHECK-NEXT:    # fallthrough-return
762  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
763  %v = load <8 x i8>, <8 x i8>* %s
764  %v2 = zext <8 x i8> %v to <8 x i16>
765  ret <8 x i16> %v2
766}
767
768define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
769; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
770; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
771; CHECK-NEXT:  # %bb.0:
772; CHECK-NEXT:    local.get 0
773; CHECK-NEXT:    i32.const 8
774; CHECK-NEXT:    i32.add
775; CHECK-NEXT:    i16x8.load8x8_u 0
776; CHECK-NEXT:    # fallthrough-return
777  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
778  %v = load <8 x i8>, <8 x i8>* %s
779  ret <8 x i8> %v
780}
781
782define <8 x i16> @load_v8i16_from_numeric_address() {
783; CHECK-LABEL: load_v8i16_from_numeric_address:
784; CHECK:         .functype load_v8i16_from_numeric_address () -> (v128)
785; CHECK-NEXT:  # %bb.0:
786; CHECK-NEXT:    i32.const 0
787; CHECK-NEXT:    v128.load 32
788; CHECK-NEXT:    # fallthrough-return
789  %s = inttoptr i32 32 to <8 x i16>*
790  %v = load <8 x i16>, <8 x i16>* %s
791  ret <8 x i16> %v
792}
793
794define <8 x i16> @load_splat_v8i16_from_numeric_address() {
795; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
796; CHECK:         .functype load_splat_v8i16_from_numeric_address () -> (v128)
797; CHECK-NEXT:  # %bb.0:
798; CHECK-NEXT:    i32.const 0
799; CHECK-NEXT:    v16x8.load_splat 32
800; CHECK-NEXT:    # fallthrough-return
801  %s = inttoptr i32 32 to i16*
802  %e = load i16, i16* %s
803  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
804  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
805  ret <8 x i16> %v2
806}
807
808define <8 x i16> @load_sext_v8i16_from_numeric_address() {
809; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
810; CHECK:         .functype load_sext_v8i16_from_numeric_address () -> (v128)
811; CHECK-NEXT:  # %bb.0:
812; CHECK-NEXT:    i32.const 0
813; CHECK-NEXT:    i16x8.load8x8_s 32
814; CHECK-NEXT:    # fallthrough-return
815  %s = inttoptr i32 32 to <8 x i8>*
816  %v = load <8 x i8>, <8 x i8>* %s
817  %v2 = sext <8 x i8> %v to <8 x i16>
818  ret <8 x i16> %v2
819}
820
821define <8 x i16> @load_zext_v8i16_from_numeric_address() {
822; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
823; CHECK:         .functype load_zext_v8i16_from_numeric_address () -> (v128)
824; CHECK-NEXT:  # %bb.0:
825; CHECK-NEXT:    i32.const 0
826; CHECK-NEXT:    i16x8.load8x8_u 32
827; CHECK-NEXT:    # fallthrough-return
828  %s = inttoptr i32 32 to <8 x i8>*
829  %v = load <8 x i8>, <8 x i8>* %s
830  %v2 = zext <8 x i8> %v to <8 x i16>
831  ret <8 x i16> %v2
832}
833
834define <8 x i8> @load_ext_v8i16_from_numeric_address() {
835; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
836; CHECK:         .functype load_ext_v8i16_from_numeric_address () -> (v128)
837; CHECK-NEXT:  # %bb.0:
838; CHECK-NEXT:    i32.const 0
839; CHECK-NEXT:    i16x8.load8x8_u 32
840; CHECK-NEXT:    # fallthrough-return
841  %s = inttoptr i32 32 to <8 x i8>*
842  %v = load <8 x i8>, <8 x i8>* %s
843  ret <8 x i8> %v
844}
845
846@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
847define <8 x i16> @load_v8i16_from_global_address() {
848; CHECK-LABEL: load_v8i16_from_global_address:
849; CHECK:         .functype load_v8i16_from_global_address () -> (v128)
850; CHECK-NEXT:  # %bb.0:
851; CHECK-NEXT:    i32.const 0
852; CHECK-NEXT:    v128.load gv_v8i16
853; CHECK-NEXT:    # fallthrough-return
854  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
855  ret <8 x i16> %v
856}
857
858@gv_i16 = global i16 42
859define <8 x i16> @load_splat_v8i16_from_global_address() {
860; CHECK-LABEL: load_splat_v8i16_from_global_address:
861; CHECK:         .functype load_splat_v8i16_from_global_address () -> (v128)
862; CHECK-NEXT:  # %bb.0:
863; CHECK-NEXT:    i32.const 0
864; CHECK-NEXT:    v16x8.load_splat gv_i16
865; CHECK-NEXT:    # fallthrough-return
866  %e = load i16, i16* @gv_i16
867  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
868  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
869  ret <8 x i16> %v2
870}
871
872@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
873define <8 x i16> @load_sext_v8i16_from_global_address() {
874; CHECK-LABEL: load_sext_v8i16_from_global_address:
875; CHECK:         .functype load_sext_v8i16_from_global_address () -> (v128)
876; CHECK-NEXT:  # %bb.0:
877; CHECK-NEXT:    i32.const 0
878; CHECK-NEXT:    i16x8.load8x8_s gv_v8i8
879; CHECK-NEXT:    # fallthrough-return
880  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
881  %v2 = sext <8 x i8> %v to <8 x i16>
882  ret <8 x i16> %v2
883}
884
885define <8 x i16> @load_zext_v8i16_from_global_address() {
886; CHECK-LABEL: load_zext_v8i16_from_global_address:
887; CHECK:         .functype load_zext_v8i16_from_global_address () -> (v128)
888; CHECK-NEXT:  # %bb.0:
889; CHECK-NEXT:    i32.const 0
890; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
891; CHECK-NEXT:    # fallthrough-return
892  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
893  %v2 = zext <8 x i8> %v to <8 x i16>
894  ret <8 x i16> %v2
895}
896
897define <8 x i8> @load_ext_v8i16_from_global_address() {
898; CHECK-LABEL: load_ext_v8i16_from_global_address:
899; CHECK:         .functype load_ext_v8i16_from_global_address () -> (v128)
900; CHECK-NEXT:  # %bb.0:
901; CHECK-NEXT:    i32.const 0
902; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
903; CHECK-NEXT:    # fallthrough-return
904  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
905  ret <8 x i8> %v
906}
907
908
909define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
910; CHECK-LABEL: store_v8i16:
911; CHECK:         .functype store_v8i16 (v128, i32) -> ()
912; CHECK-NEXT:  # %bb.0:
913; CHECK-NEXT:    local.get 1
914; CHECK-NEXT:    local.get 0
915; CHECK-NEXT:    v128.store 0
916; CHECK-NEXT:    # fallthrough-return
917  store <8 x i16> %v , <8 x i16>* %p
918  ret void
919}
920
921define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
922; CHECK-LABEL: store_v8i16_with_folded_offset:
923; CHECK:         .functype store_v8i16_with_folded_offset (v128, i32) -> ()
924; CHECK-NEXT:  # %bb.0:
925; CHECK-NEXT:    local.get 1
926; CHECK-NEXT:    local.get 0
927; CHECK-NEXT:    v128.store 16
928; CHECK-NEXT:    # fallthrough-return
929  %q = ptrtoint <8 x i16>* %p to i32
930  %r = add nuw i32 %q, 16
931  %s = inttoptr i32 %r to <8 x i16>*
932  store <8 x i16> %v , <8 x i16>* %s
933  ret void
934}
935
936define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
937; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
938; CHECK:         .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
939; CHECK-NEXT:  # %bb.0:
940; CHECK-NEXT:    local.get 1
941; CHECK-NEXT:    local.get 0
942; CHECK-NEXT:    v128.store 16
943; CHECK-NEXT:    # fallthrough-return
944  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
945  store <8 x i16> %v , <8 x i16>* %s
946  ret void
947}
948
949define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
950; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
951; CHECK:         .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
952; CHECK-NEXT:  # %bb.0:
953; CHECK-NEXT:    local.get 1
954; CHECK-NEXT:    i32.const -16
955; CHECK-NEXT:    i32.add
956; CHECK-NEXT:    local.get 0
957; CHECK-NEXT:    v128.store 0
958; CHECK-NEXT:    # fallthrough-return
959  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
960  store <8 x i16> %v , <8 x i16>* %s
961  ret void
962}
963
964define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
965; CHECK-LABEL: store_v8i16_with_unfolded_offset:
966; CHECK:         .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
967; CHECK-NEXT:  # %bb.0:
968; CHECK-NEXT:    local.get 1
969; CHECK-NEXT:    i32.const 16
970; CHECK-NEXT:    i32.add
971; CHECK-NEXT:    local.get 0
972; CHECK-NEXT:    v128.store 0
973; CHECK-NEXT:    # fallthrough-return
974  %q = ptrtoint <8 x i16>* %p to i32
975  %r = add nsw i32 %q, 16
976  %s = inttoptr i32 %r to <8 x i16>*
977  store <8 x i16> %v , <8 x i16>* %s
978  ret void
979}
980
981define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
982; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
983; CHECK:         .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
984; CHECK-NEXT:  # %bb.0:
985; CHECK-NEXT:    local.get 1
986; CHECK-NEXT:    i32.const 16
987; CHECK-NEXT:    i32.add
988; CHECK-NEXT:    local.get 0
989; CHECK-NEXT:    v128.store 0
990; CHECK-NEXT:    # fallthrough-return
991  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
992  store <8 x i16> %v , <8 x i16>* %s
993  ret void
994}
995
996define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
997; CHECK-LABEL: store_v8i16_to_numeric_address:
998; CHECK:         .functype store_v8i16_to_numeric_address (v128) -> ()
999; CHECK-NEXT:  # %bb.0:
1000; CHECK-NEXT:    i32.const 0
1001; CHECK-NEXT:    local.get 0
1002; CHECK-NEXT:    v128.store 32
1003; CHECK-NEXT:    # fallthrough-return
1004  %s = inttoptr i32 32 to <8 x i16>*
1005  store <8 x i16> %v , <8 x i16>* %s
1006  ret void
1007}
1008
1009define void @store_v8i16_to_global_address(<8 x i16> %v) {
1010; CHECK-LABEL: store_v8i16_to_global_address:
1011; CHECK:         .functype store_v8i16_to_global_address (v128) -> ()
1012; CHECK-NEXT:  # %bb.0:
1013; CHECK-NEXT:    i32.const 0
1014; CHECK-NEXT:    local.get 0
1015; CHECK-NEXT:    v128.store gv_v8i16
1016; CHECK-NEXT:    # fallthrough-return
1017  store <8 x i16> %v , <8 x i16>* @gv_v8i16
1018  ret void
1019}
1020
1021; ==============================================================================
1022; 4 x i32
1023; ==============================================================================
1024define <4 x i32> @load_v4i32(<4 x i32>* %p) {
1025; CHECK-LABEL: load_v4i32:
1026; CHECK:         .functype load_v4i32 (i32) -> (v128)
1027; CHECK-NEXT:  # %bb.0:
1028; CHECK-NEXT:    local.get 0
1029; CHECK-NEXT:    v128.load 0
1030; CHECK-NEXT:    # fallthrough-return
1031  %v = load <4 x i32>, <4 x i32>* %p
1032  ret <4 x i32> %v
1033}
1034
1035define <4 x i32> @load_splat_v4i32(i32* %addr) {
1036; CHECK-LABEL: load_splat_v4i32:
1037; CHECK:         .functype load_splat_v4i32 (i32) -> (v128)
1038; CHECK-NEXT:  # %bb.0:
1039; CHECK-NEXT:    local.get 0
1040; CHECK-NEXT:    v32x4.load_splat 0
1041; CHECK-NEXT:    # fallthrough-return
1042  %e = load i32, i32* %addr, align 4
1043  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1044  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1045  ret <4 x i32> %v2
1046}
1047
1048define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
1049; CHECK-LABEL: load_sext_v4i32:
1050; CHECK:         .functype load_sext_v4i32 (i32) -> (v128)
1051; CHECK-NEXT:  # %bb.0:
1052; CHECK-NEXT:    local.get 0
1053; CHECK-NEXT:    i32x4.load16x4_s 0
1054; CHECK-NEXT:    # fallthrough-return
1055  %v = load <4 x i16>, <4 x i16>* %p
1056  %v2 = sext <4 x i16> %v to <4 x i32>
1057  ret <4 x i32> %v2
1058}
1059
1060define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
1061; CHECK-LABEL: load_zext_v4i32:
1062; CHECK:         .functype load_zext_v4i32 (i32) -> (v128)
1063; CHECK-NEXT:  # %bb.0:
1064; CHECK-NEXT:    local.get 0
1065; CHECK-NEXT:    i32x4.load16x4_u 0
1066; CHECK-NEXT:    # fallthrough-return
1067  %v = load <4 x i16>, <4 x i16>* %p
1068  %v2 = zext <4 x i16> %v to <4 x i32>
1069  ret <4 x i32> %v2
1070}
1071
1072define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
1073; CHECK-LABEL: load_ext_v4i32:
1074; CHECK:         .functype load_ext_v4i32 (i32) -> (v128)
1075; CHECK-NEXT:  # %bb.0:
1076; CHECK-NEXT:    local.get 0
1077; CHECK-NEXT:    i32x4.load16x4_u 0
1078; CHECK-NEXT:    # fallthrough-return
1079  %v = load <4 x i16>, <4 x i16>* %p
1080  ret <4 x i16> %v
1081}
1082
1083define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
1084; CHECK-LABEL: load_v4i32_with_folded_offset:
1085; CHECK:         .functype load_v4i32_with_folded_offset (i32) -> (v128)
1086; CHECK-NEXT:  # %bb.0:
1087; CHECK-NEXT:    local.get 0
1088; CHECK-NEXT:    v128.load 16
1089; CHECK-NEXT:    # fallthrough-return
1090  %q = ptrtoint <4 x i32>* %p to i32
1091  %r = add nuw i32 %q, 16
1092  %s = inttoptr i32 %r to <4 x i32>*
1093  %v = load <4 x i32>, <4 x i32>* %s
1094  ret <4 x i32> %v
1095}
1096
1097define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
1098; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1099; CHECK:         .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
1100; CHECK-NEXT:  # %bb.0:
1101; CHECK-NEXT:    local.get 0
1102; CHECK-NEXT:    v32x4.load_splat 16
1103; CHECK-NEXT:    # fallthrough-return
1104  %q = ptrtoint i32* %p to i32
1105  %r = add nuw i32 %q, 16
1106  %s = inttoptr i32 %r to i32*
1107  %e = load i32, i32* %s
1108  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1109  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1110  ret <4 x i32> %v2
1111}
1112
1113define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
1114; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
1115; CHECK:         .functype load_sext_v4i32_with_folded_offset (i32) -> (v128)
1116; CHECK-NEXT:  # %bb.0:
1117; CHECK-NEXT:    local.get 0
1118; CHECK-NEXT:    i32x4.load16x4_s 16
1119; CHECK-NEXT:    # fallthrough-return
1120  %q = ptrtoint <4 x i16>* %p to i32
1121  %r = add nuw i32 %q, 16
1122  %s = inttoptr i32 %r to <4 x i16>*
1123  %v = load <4 x i16>, <4 x i16>* %s
1124  %v2 = sext <4 x i16> %v to <4 x i32>
1125  ret <4 x i32> %v2
1126}
1127
1128define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
1129; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
1130; CHECK:         .functype load_zext_v4i32_with_folded_offset (i32) -> (v128)
1131; CHECK-NEXT:  # %bb.0:
1132; CHECK-NEXT:    local.get 0
1133; CHECK-NEXT:    i32x4.load16x4_u 16
1134; CHECK-NEXT:    # fallthrough-return
1135  %q = ptrtoint <4 x i16>* %p to i32
1136  %r = add nuw i32 %q, 16
1137  %s = inttoptr i32 %r to <4 x i16>*
1138  %v = load <4 x i16>, <4 x i16>* %s
1139  %v2 = zext <4 x i16> %v to <4 x i32>
1140  ret <4 x i32> %v2
1141}
1142
1143define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
1144; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1145; CHECK:         .functype load_ext_v4i32_with_folded_offset (i32) -> (v128)
1146; CHECK-NEXT:  # %bb.0:
1147; CHECK-NEXT:    local.get 0
1148; CHECK-NEXT:    i32x4.load16x4_u 16
1149; CHECK-NEXT:    # fallthrough-return
1150  %q = ptrtoint <4 x i16>* %p to i32
1151  %r = add nuw i32 %q, 16
1152  %s = inttoptr i32 %r to <4 x i16>*
1153  %v = load <4 x i16>, <4 x i16>* %s
1154  ret <4 x i16> %v
1155}
1156
1157define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
1158; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1159; CHECK:         .functype load_v4i32_with_folded_gep_offset (i32) -> (v128)
1160; CHECK-NEXT:  # %bb.0:
1161; CHECK-NEXT:    local.get 0
1162; CHECK-NEXT:    v128.load 16
1163; CHECK-NEXT:    # fallthrough-return
1164  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1165  %v = load <4 x i32>, <4 x i32>* %s
1166  ret <4 x i32> %v
1167}
1168
1169define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
1170; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1171; CHECK:         .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
1172; CHECK-NEXT:  # %bb.0:
1173; CHECK-NEXT:    local.get 0
1174; CHECK-NEXT:    v32x4.load_splat 4
1175; CHECK-NEXT:    # fallthrough-return
1176  %s = getelementptr inbounds i32, i32* %p, i32 1
1177  %e = load i32, i32* %s
1178  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1179  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1180  ret <4 x i32> %v2
1181}
1182
1183define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1184; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
1185; CHECK:         .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128)
1186; CHECK-NEXT:  # %bb.0:
1187; CHECK-NEXT:    local.get 0
1188; CHECK-NEXT:    i32x4.load16x4_s 8
1189; CHECK-NEXT:    # fallthrough-return
1190  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1191  %v = load <4 x i16>, <4 x i16>* %s
1192  %v2 = sext <4 x i16> %v to <4 x i32>
1193  ret <4 x i32> %v2
1194}
1195
1196define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1197; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
1198; CHECK:         .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128)
1199; CHECK-NEXT:  # %bb.0:
1200; CHECK-NEXT:    local.get 0
1201; CHECK-NEXT:    i32x4.load16x4_u 8
1202; CHECK-NEXT:    # fallthrough-return
1203  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1204  %v = load <4 x i16>, <4 x i16>* %s
1205  %v2 = zext <4 x i16> %v to <4 x i32>
1206  ret <4 x i32> %v2
1207}
1208
1209define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1210; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1211; CHECK:         .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128)
1212; CHECK-NEXT:  # %bb.0:
1213; CHECK-NEXT:    local.get 0
1214; CHECK-NEXT:    i32x4.load16x4_u 8
1215; CHECK-NEXT:    # fallthrough-return
1216  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1217  %v = load <4 x i16>, <4 x i16>* %s
1218  ret <4 x i16> %v
1219}
1220
1221define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
1222; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1223; CHECK:         .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1224; CHECK-NEXT:  # %bb.0:
1225; CHECK-NEXT:    local.get 0
1226; CHECK-NEXT:    i32.const -16
1227; CHECK-NEXT:    i32.add
1228; CHECK-NEXT:    v128.load 0
1229; CHECK-NEXT:    # fallthrough-return
1230  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1231  %v = load <4 x i32>, <4 x i32>* %s
1232  ret <4 x i32> %v
1233}
1234
1235define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
1236; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1237; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1238; CHECK-NEXT:  # %bb.0:
1239; CHECK-NEXT:    local.get 0
1240; CHECK-NEXT:    i32.const -4
1241; CHECK-NEXT:    i32.add
1242; CHECK-NEXT:    v32x4.load_splat 0
1243; CHECK-NEXT:    # fallthrough-return
1244  %s = getelementptr inbounds i32, i32* %p, i32 -1
1245  %e = load i32, i32* %s
1246  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1247  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1248  ret <4 x i32> %v2
1249}
1250
1251define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1252; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
1253; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1254; CHECK-NEXT:  # %bb.0:
1255; CHECK-NEXT:    local.get 0
1256; CHECK-NEXT:    i32.const -8
1257; CHECK-NEXT:    i32.add
1258; CHECK-NEXT:    i32x4.load16x4_s 0
1259; CHECK-NEXT:    # fallthrough-return
1260  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1261  %v = load <4 x i16>, <4 x i16>* %s
1262  %v2 = sext <4 x i16> %v to <4 x i32>
1263  ret <4 x i32> %v2
1264}
1265
1266define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1267; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
1268; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1269; CHECK-NEXT:  # %bb.0:
1270; CHECK-NEXT:    local.get 0
1271; CHECK-NEXT:    i32.const -8
1272; CHECK-NEXT:    i32.add
1273; CHECK-NEXT:    i32x4.load16x4_u 0
1274; CHECK-NEXT:    # fallthrough-return
1275  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1276  %v = load <4 x i16>, <4 x i16>* %s
1277  %v2 = zext <4 x i16> %v to <4 x i32>
1278  ret <4 x i32> %v2
1279}
1280
1281define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1282; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1283; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1284; CHECK-NEXT:  # %bb.0:
1285; CHECK-NEXT:    local.get 0
1286; CHECK-NEXT:    i32.const -8
1287; CHECK-NEXT:    i32.add
1288; CHECK-NEXT:    i32x4.load16x4_u 0
1289; CHECK-NEXT:    # fallthrough-return
1290  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1291  %v = load <4 x i16>, <4 x i16>* %s
1292  ret <4 x i16> %v
1293}
1294
1295define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
1296; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1297; CHECK:         .functype load_v4i32_with_unfolded_offset (i32) -> (v128)
1298; CHECK-NEXT:  # %bb.0:
1299; CHECK-NEXT:    local.get 0
1300; CHECK-NEXT:    i32.const 16
1301; CHECK-NEXT:    i32.add
1302; CHECK-NEXT:    v128.load 0
1303; CHECK-NEXT:    # fallthrough-return
1304  %q = ptrtoint <4 x i32>* %p to i32
1305  %r = add nsw i32 %q, 16
1306  %s = inttoptr i32 %r to <4 x i32>*
1307  %v = load <4 x i32>, <4 x i32>* %s
1308  ret <4 x i32> %v
1309}
1310
1311define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
1312; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1313; CHECK:         .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128)
1314; CHECK-NEXT:  # %bb.0:
1315; CHECK-NEXT:    local.get 0
1316; CHECK-NEXT:    i32.const 16
1317; CHECK-NEXT:    i32.add
1318; CHECK-NEXT:    v32x4.load_splat 0
1319; CHECK-NEXT:    # fallthrough-return
1320  %q = ptrtoint i32* %p to i32
1321  %r = add nsw i32 %q, 16
1322  %s = inttoptr i32 %r to i32*
1323  %e = load i32, i32* %s
1324  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1325  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1326  ret <4 x i32> %v2
1327}
1328
1329define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1330; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
1331; CHECK:         .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128)
1332; CHECK-NEXT:  # %bb.0:
1333; CHECK-NEXT:    local.get 0
1334; CHECK-NEXT:    i32.const 16
1335; CHECK-NEXT:    i32.add
1336; CHECK-NEXT:    i32x4.load16x4_s 0
1337; CHECK-NEXT:    # fallthrough-return
1338  %q = ptrtoint <4 x i16>* %p to i32
1339  %r = add nsw i32 %q, 16
1340  %s = inttoptr i32 %r to <4 x i16>*
1341  %v = load <4 x i16>, <4 x i16>* %s
1342  %v2 = sext <4 x i16> %v to <4 x i32>
1343  ret <4 x i32> %v2
1344}
1345
1346define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1347; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
1348; CHECK:         .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128)
1349; CHECK-NEXT:  # %bb.0:
1350; CHECK-NEXT:    local.get 0
1351; CHECK-NEXT:    i32.const 16
1352; CHECK-NEXT:    i32.add
1353; CHECK-NEXT:    i32x4.load16x4_u 0
1354; CHECK-NEXT:    # fallthrough-return
1355  %q = ptrtoint <4 x i16>* %p to i32
1356  %r = add nsw i32 %q, 16
1357  %s = inttoptr i32 %r to <4 x i16>*
1358  %v = load <4 x i16>, <4 x i16>* %s
1359  %v2 = zext <4 x i16> %v to <4 x i32>
1360  ret <4 x i32> %v2
1361}
1362
1363define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1364; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1365; CHECK:         .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128)
1366; CHECK-NEXT:  # %bb.0:
1367; CHECK-NEXT:    local.get 0
1368; CHECK-NEXT:    i32.const 16
1369; CHECK-NEXT:    i32.add
1370; CHECK-NEXT:    i32x4.load16x4_u 0
1371; CHECK-NEXT:    # fallthrough-return
1372  %q = ptrtoint <4 x i16>* %p to i32
1373  %r = add nsw i32 %q, 16
1374  %s = inttoptr i32 %r to <4 x i16>*
1375  %v = load <4 x i16>, <4 x i16>* %s
1376  ret <4 x i16> %v
1377}
1378
1379define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
1380; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1381; CHECK:         .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1382; CHECK-NEXT:  # %bb.0:
1383; CHECK-NEXT:    local.get 0
1384; CHECK-NEXT:    i32.const 16
1385; CHECK-NEXT:    i32.add
1386; CHECK-NEXT:    v128.load 0
1387; CHECK-NEXT:    # fallthrough-return
1388  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1389  %v = load <4 x i32>, <4 x i32>* %s
1390  ret <4 x i32> %v
1391}
1392
1393define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
1394; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1395; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1396; CHECK-NEXT:  # %bb.0:
1397; CHECK-NEXT:    local.get 0
1398; CHECK-NEXT:    i32.const 4
1399; CHECK-NEXT:    i32.add
1400; CHECK-NEXT:    v32x4.load_splat 0
1401; CHECK-NEXT:    # fallthrough-return
1402  %s = getelementptr i32, i32* %p, i32 1
1403  %e = load i32, i32* %s
1404  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1405  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1406  ret <4 x i32> %v2
1407}
1408
1409define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1410; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
1411; CHECK:         .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1412; CHECK-NEXT:  # %bb.0:
1413; CHECK-NEXT:    local.get 0
1414; CHECK-NEXT:    i32.const 8
1415; CHECK-NEXT:    i32.add
1416; CHECK-NEXT:    i32x4.load16x4_s 0
1417; CHECK-NEXT:    # fallthrough-return
1418  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1419  %v = load <4 x i16>, <4 x i16>* %s
1420  %v2 = sext <4 x i16> %v to <4 x i32>
1421  ret <4 x i32> %v2
1422}
1423
1424define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1425; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
1426; CHECK:         .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1427; CHECK-NEXT:  # %bb.0:
1428; CHECK-NEXT:    local.get 0
1429; CHECK-NEXT:    i32.const 8
1430; CHECK-NEXT:    i32.add
1431; CHECK-NEXT:    i32x4.load16x4_u 0
1432; CHECK-NEXT:    # fallthrough-return
1433  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1434  %v = load <4 x i16>, <4 x i16>* %s
1435  %v2 = zext <4 x i16> %v to <4 x i32>
1436  ret <4 x i32> %v2
1437}
1438
1439define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1440; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1441; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1442; CHECK-NEXT:  # %bb.0:
1443; CHECK-NEXT:    local.get 0
1444; CHECK-NEXT:    i32.const 8
1445; CHECK-NEXT:    i32.add
1446; CHECK-NEXT:    i32x4.load16x4_u 0
1447; CHECK-NEXT:    # fallthrough-return
1448  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1449  %v = load <4 x i16>, <4 x i16>* %s
1450  ret <4 x i16> %v
1451}
1452
1453define <4 x i32> @load_v4i32_from_numeric_address() {
1454; CHECK-LABEL: load_v4i32_from_numeric_address:
1455; CHECK:         .functype load_v4i32_from_numeric_address () -> (v128)
1456; CHECK-NEXT:  # %bb.0:
1457; CHECK-NEXT:    i32.const 0
1458; CHECK-NEXT:    v128.load 32
1459; CHECK-NEXT:    # fallthrough-return
1460  %s = inttoptr i32 32 to <4 x i32>*
1461  %v = load <4 x i32>, <4 x i32>* %s
1462  ret <4 x i32> %v
1463}
1464
1465define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1466; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1467; CHECK:         .functype load_splat_v4i32_from_numeric_address () -> (v128)
1468; CHECK-NEXT:  # %bb.0:
1469; CHECK-NEXT:    i32.const 0
1470; CHECK-NEXT:    v32x4.load_splat 32
1471; CHECK-NEXT:    # fallthrough-return
1472  %s = inttoptr i32 32 to i32*
1473  %e = load i32, i32* %s
1474  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1475  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1476  ret <4 x i32> %v2
1477}
1478
1479define <4 x i32> @load_sext_v4i32_from_numeric_address() {
1480; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
1481; CHECK:         .functype load_sext_v4i32_from_numeric_address () -> (v128)
1482; CHECK-NEXT:  # %bb.0:
1483; CHECK-NEXT:    i32.const 0
1484; CHECK-NEXT:    i32x4.load16x4_s 32
1485; CHECK-NEXT:    # fallthrough-return
1486  %s = inttoptr i32 32 to <4 x i16>*
1487  %v = load <4 x i16>, <4 x i16>* %s
1488  %v2 = sext <4 x i16> %v to <4 x i32>
1489  ret <4 x i32> %v2
1490}
1491
1492define <4 x i32> @load_zext_v4i32_from_numeric_address() {
1493; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
1494; CHECK:         .functype load_zext_v4i32_from_numeric_address () -> (v128)
1495; CHECK-NEXT:  # %bb.0:
1496; CHECK-NEXT:    i32.const 0
1497; CHECK-NEXT:    i32x4.load16x4_u 32
1498; CHECK-NEXT:    # fallthrough-return
1499  %s = inttoptr i32 32 to <4 x i16>*
1500  %v = load <4 x i16>, <4 x i16>* %s
1501  %v2 = zext <4 x i16> %v to <4 x i32>
1502  ret <4 x i32> %v2
1503}
1504
1505define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1506; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1507; CHECK:         .functype load_ext_v4i32_from_numeric_address () -> (v128)
1508; CHECK-NEXT:  # %bb.0:
1509; CHECK-NEXT:    i32.const 0
1510; CHECK-NEXT:    i32x4.load16x4_u 32
1511; CHECK-NEXT:    # fallthrough-return
1512  %s = inttoptr i32 32 to <4 x i16>*
1513  %v = load <4 x i16>, <4 x i16>* %s
1514  ret <4 x i16> %v
1515}
1516
1517@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1518define <4 x i32> @load_v4i32_from_global_address() {
1519; CHECK-LABEL: load_v4i32_from_global_address:
1520; CHECK:         .functype load_v4i32_from_global_address () -> (v128)
1521; CHECK-NEXT:  # %bb.0:
1522; CHECK-NEXT:    i32.const 0
1523; CHECK-NEXT:    v128.load gv_v4i32
1524; CHECK-NEXT:    # fallthrough-return
1525  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
1526  ret <4 x i32> %v
1527}
1528
1529@gv_i32 = global i32 42
1530define <4 x i32> @load_splat_v4i32_from_global_address() {
1531; CHECK-LABEL: load_splat_v4i32_from_global_address:
1532; CHECK:         .functype load_splat_v4i32_from_global_address () -> (v128)
1533; CHECK-NEXT:  # %bb.0:
1534; CHECK-NEXT:    i32.const 0
1535; CHECK-NEXT:    v32x4.load_splat gv_i32
1536; CHECK-NEXT:    # fallthrough-return
1537  %e = load i32, i32* @gv_i32
1538  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1539  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1540  ret <4 x i32> %v2
1541}
1542
1543@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1544define <4 x i32> @load_sext_v4i32_from_global_address() {
1545; CHECK-LABEL: load_sext_v4i32_from_global_address:
1546; CHECK:         .functype load_sext_v4i32_from_global_address () -> (v128)
1547; CHECK-NEXT:  # %bb.0:
1548; CHECK-NEXT:    i32.const 0
1549; CHECK-NEXT:    i32x4.load16x4_s gv_v4i16
1550; CHECK-NEXT:    # fallthrough-return
1551  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1552  %v2 = sext <4 x i16> %v to <4 x i32>
1553  ret <4 x i32> %v2
1554}
1555
1556define <4 x i32> @load_zext_v4i32_from_global_address() {
1557; CHECK-LABEL: load_zext_v4i32_from_global_address:
1558; CHECK:         .functype load_zext_v4i32_from_global_address () -> (v128)
1559; CHECK-NEXT:  # %bb.0:
1560; CHECK-NEXT:    i32.const 0
1561; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1562; CHECK-NEXT:    # fallthrough-return
1563  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1564  %v2 = zext <4 x i16> %v to <4 x i32>
1565  ret <4 x i32> %v2
1566}
1567
1568define <4 x i16> @load_ext_v4i32_from_global_address() {
1569; CHECK-LABEL: load_ext_v4i32_from_global_address:
1570; CHECK:         .functype load_ext_v4i32_from_global_address () -> (v128)
1571; CHECK-NEXT:  # %bb.0:
1572; CHECK-NEXT:    i32.const 0
1573; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1574; CHECK-NEXT:    # fallthrough-return
1575  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1576  ret <4 x i16> %v
1577}
1578
1579define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
1580; CHECK-LABEL: store_v4i32:
1581; CHECK:         .functype store_v4i32 (v128, i32) -> ()
1582; CHECK-NEXT:  # %bb.0:
1583; CHECK-NEXT:    local.get 1
1584; CHECK-NEXT:    local.get 0
1585; CHECK-NEXT:    v128.store 0
1586; CHECK-NEXT:    # fallthrough-return
1587  store <4 x i32> %v , <4 x i32>* %p
1588  ret void
1589}
1590
1591define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
1592; CHECK-LABEL: store_v4i32_with_folded_offset:
1593; CHECK:         .functype store_v4i32_with_folded_offset (v128, i32) -> ()
1594; CHECK-NEXT:  # %bb.0:
1595; CHECK-NEXT:    local.get 1
1596; CHECK-NEXT:    local.get 0
1597; CHECK-NEXT:    v128.store 16
1598; CHECK-NEXT:    # fallthrough-return
1599  %q = ptrtoint <4 x i32>* %p to i32
1600  %r = add nuw i32 %q, 16
1601  %s = inttoptr i32 %r to <4 x i32>*
1602  store <4 x i32> %v , <4 x i32>* %s
1603  ret void
1604}
1605
1606define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1607; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
1608; CHECK:         .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
1609; CHECK-NEXT:  # %bb.0:
1610; CHECK-NEXT:    local.get 1
1611; CHECK-NEXT:    local.get 0
1612; CHECK-NEXT:    v128.store 16
1613; CHECK-NEXT:    # fallthrough-return
1614  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1615  store <4 x i32> %v , <4 x i32>* %s
1616  ret void
1617}
1618
1619define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
1620; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
1621; CHECK:         .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1622; CHECK-NEXT:  # %bb.0:
1623; CHECK-NEXT:    local.get 1
1624; CHECK-NEXT:    i32.const -16
1625; CHECK-NEXT:    i32.add
1626; CHECK-NEXT:    local.get 0
1627; CHECK-NEXT:    v128.store 0
1628; CHECK-NEXT:    # fallthrough-return
1629  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1630  store <4 x i32> %v , <4 x i32>* %s
1631  ret void
1632}
1633
1634define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
1635; CHECK-LABEL: store_v4i32_with_unfolded_offset:
1636; CHECK:         .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
1637; CHECK-NEXT:  # %bb.0:
1638; CHECK-NEXT:    local.get 1
1639; CHECK-NEXT:    i32.const 16
1640; CHECK-NEXT:    i32.add
1641; CHECK-NEXT:    local.get 0
1642; CHECK-NEXT:    v128.store 0
1643; CHECK-NEXT:    # fallthrough-return
1644  %q = ptrtoint <4 x i32>* %p to i32
1645  %r = add nsw i32 %q, 16
1646  %s = inttoptr i32 %r to <4 x i32>*
1647  store <4 x i32> %v , <4 x i32>* %s
1648  ret void
1649}
1650
1651define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1652; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
1653; CHECK:         .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
1654; CHECK-NEXT:  # %bb.0:
1655; CHECK-NEXT:    local.get 1
1656; CHECK-NEXT:    i32.const 16
1657; CHECK-NEXT:    i32.add
1658; CHECK-NEXT:    local.get 0
1659; CHECK-NEXT:    v128.store 0
1660; CHECK-NEXT:    # fallthrough-return
1661  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1662  store <4 x i32> %v , <4 x i32>* %s
1663  ret void
1664}
1665
1666define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
1667; CHECK-LABEL: store_v4i32_to_numeric_address:
1668; CHECK:         .functype store_v4i32_to_numeric_address (v128) -> ()
1669; CHECK-NEXT:  # %bb.0:
1670; CHECK-NEXT:    i32.const 0
1671; CHECK-NEXT:    local.get 0
1672; CHECK-NEXT:    v128.store 32
1673; CHECK-NEXT:    # fallthrough-return
1674  %s = inttoptr i32 32 to <4 x i32>*
1675  store <4 x i32> %v , <4 x i32>* %s
1676  ret void
1677}
1678
1679define void @store_v4i32_to_global_address(<4 x i32> %v) {
1680; CHECK-LABEL: store_v4i32_to_global_address:
1681; CHECK:         .functype store_v4i32_to_global_address (v128) -> ()
1682; CHECK-NEXT:  # %bb.0:
1683; CHECK-NEXT:    i32.const 0
1684; CHECK-NEXT:    local.get 0
1685; CHECK-NEXT:    v128.store gv_v4i32
1686; CHECK-NEXT:    # fallthrough-return
1687  store <4 x i32> %v , <4 x i32>* @gv_v4i32
1688  ret void
1689}
1690
1691; ==============================================================================
1692; 2 x i64
1693; ==============================================================================
1694define <2 x i64> @load_v2i64(<2 x i64>* %p) {
1695; CHECK-LABEL: load_v2i64:
1696; CHECK:         .functype load_v2i64 (i32) -> (v128)
1697; CHECK-NEXT:  # %bb.0:
1698; CHECK-NEXT:    local.get 0
1699; CHECK-NEXT:    v128.load 0
1700; CHECK-NEXT:    # fallthrough-return
1701  %v = load <2 x i64>, <2 x i64>* %p
1702  ret <2 x i64> %v
1703}
1704
1705define <2 x i64> @load_splat_v2i64(i64* %p) {
1706; CHECK-LABEL: load_splat_v2i64:
1707; CHECK:         .functype load_splat_v2i64 (i32) -> (v128)
1708; CHECK-NEXT:  # %bb.0:
1709; CHECK-NEXT:    local.get 0
1710; CHECK-NEXT:    v64x2.load_splat 0
1711; CHECK-NEXT:    # fallthrough-return
1712  %e = load i64, i64* %p
1713  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1714  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1715  ret <2 x i64> %v2
1716}
1717
1718define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
1719; CHECK-LABEL: load_sext_v2i64:
1720; CHECK:         .functype load_sext_v2i64 (i32) -> (v128)
1721; CHECK-NEXT:  # %bb.0:
1722; CHECK-NEXT:    local.get 0
1723; CHECK-NEXT:    i64x2.load32x2_s 0
1724; CHECK-NEXT:    # fallthrough-return
1725  %v = load <2 x i32>, <2 x i32>* %p
1726  %v2 = sext <2 x i32> %v to <2 x i64>
1727  ret <2 x i64> %v2
1728}
1729
1730define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
1731; CHECK-LABEL: load_zext_v2i64:
1732; CHECK:         .functype load_zext_v2i64 (i32) -> (v128)
1733; CHECK-NEXT:  # %bb.0:
1734; CHECK-NEXT:    local.get 0
1735; CHECK-NEXT:    i64x2.load32x2_u 0
1736; CHECK-NEXT:    # fallthrough-return
1737  %v = load <2 x i32>, <2 x i32>* %p
1738  %v2 = zext <2 x i32> %v to <2 x i64>
1739  ret <2 x i64> %v2
1740}
1741
1742define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
1743; CHECK-LABEL: load_ext_v2i64:
1744; CHECK:         .functype load_ext_v2i64 (i32) -> (v128)
1745; CHECK-NEXT:  # %bb.0:
1746; CHECK-NEXT:    local.get 0
1747; CHECK-NEXT:    i64x2.load32x2_u 0
1748; CHECK-NEXT:    # fallthrough-return
1749  %v = load <2 x i32>, <2 x i32>* %p
1750  ret <2 x i32> %v
1751}
1752
1753define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
1754; CHECK-LABEL: load_v2i64_with_folded_offset:
1755; CHECK:         .functype load_v2i64_with_folded_offset (i32) -> (v128)
1756; CHECK-NEXT:  # %bb.0:
1757; CHECK-NEXT:    local.get 0
1758; CHECK-NEXT:    v128.load 16
1759; CHECK-NEXT:    # fallthrough-return
1760  %q = ptrtoint <2 x i64>* %p to i32
1761  %r = add nuw i32 %q, 16
1762  %s = inttoptr i32 %r to <2 x i64>*
1763  %v = load <2 x i64>, <2 x i64>* %s
1764  ret <2 x i64> %v
1765}
1766
1767define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
1768; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
1769; CHECK:         .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
1770; CHECK-NEXT:  # %bb.0:
1771; CHECK-NEXT:    local.get 0
1772; CHECK-NEXT:    v64x2.load_splat 16
1773; CHECK-NEXT:    # fallthrough-return
1774  %q = ptrtoint i64* %p to i32
1775  %r = add nuw i32 %q, 16
1776  %s = inttoptr i32 %r to i64*
1777  %e = load i64, i64* %s
1778  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1779  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1780  ret <2 x i64> %v2
1781}
1782
1783define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
1784; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
1785; CHECK:         .functype load_sext_v2i64_with_folded_offset (i32) -> (v128)
1786; CHECK-NEXT:  # %bb.0:
1787; CHECK-NEXT:    local.get 0
1788; CHECK-NEXT:    i64x2.load32x2_s 16
1789; CHECK-NEXT:    # fallthrough-return
1790  %q = ptrtoint <2 x i32>* %p to i32
1791  %r = add nuw i32 %q, 16
1792  %s = inttoptr i32 %r to <2 x i32>*
1793  %v = load <2 x i32>, <2 x i32>* %s
1794  %v2 = sext <2 x i32> %v to <2 x i64>
1795  ret <2 x i64> %v2
1796}
1797
1798define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
1799; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
1800; CHECK:         .functype load_zext_v2i64_with_folded_offset (i32) -> (v128)
1801; CHECK-NEXT:  # %bb.0:
1802; CHECK-NEXT:    local.get 0
1803; CHECK-NEXT:    i64x2.load32x2_u 16
1804; CHECK-NEXT:    # fallthrough-return
1805  %q = ptrtoint <2 x i32>* %p to i32
1806  %r = add nuw i32 %q, 16
1807  %s = inttoptr i32 %r to <2 x i32>*
1808  %v = load <2 x i32>, <2 x i32>* %s
1809  %v2 = zext <2 x i32> %v to <2 x i64>
1810  ret <2 x i64> %v2
1811}
1812
1813define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
1814; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
1815; CHECK:         .functype load_ext_v2i64_with_folded_offset (i32) -> (v128)
1816; CHECK-NEXT:  # %bb.0:
1817; CHECK-NEXT:    local.get 0
1818; CHECK-NEXT:    i64x2.load32x2_u 16
1819; CHECK-NEXT:    # fallthrough-return
1820  %q = ptrtoint <2 x i32>* %p to i32
1821  %r = add nuw i32 %q, 16
1822  %s = inttoptr i32 %r to <2 x i32>*
1823  %v = load <2 x i32>, <2 x i32>* %s
1824  ret <2 x i32> %v
1825}
1826
1827define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
1828; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
1829; CHECK:         .functype load_v2i64_with_folded_gep_offset (i32) -> (v128)
1830; CHECK-NEXT:  # %bb.0:
1831; CHECK-NEXT:    local.get 0
1832; CHECK-NEXT:    v128.load 16
1833; CHECK-NEXT:    # fallthrough-return
1834  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
1835  %v = load <2 x i64>, <2 x i64>* %s
1836  ret <2 x i64> %v
1837}
1838
1839define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
1840; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
1841; CHECK:         .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
1842; CHECK-NEXT:  # %bb.0:
1843; CHECK-NEXT:    local.get 0
1844; CHECK-NEXT:    v64x2.load_splat 8
1845; CHECK-NEXT:    # fallthrough-return
1846  %s = getelementptr inbounds i64, i64* %p, i32 1
1847  %e = load i64, i64* %s
1848  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1849  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1850  ret <2 x i64> %v2
1851}
1852
1853define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1854; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
1855; CHECK:         .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128)
1856; CHECK-NEXT:  # %bb.0:
1857; CHECK-NEXT:    local.get 0
1858; CHECK-NEXT:    i64x2.load32x2_s 8
1859; CHECK-NEXT:    # fallthrough-return
1860  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1861  %v = load <2 x i32>, <2 x i32>* %s
1862  %v2 = sext <2 x i32> %v to <2 x i64>
1863  ret <2 x i64> %v2
1864}
1865
1866define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1867; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
1868; CHECK:         .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128)
1869; CHECK-NEXT:  # %bb.0:
1870; CHECK-NEXT:    local.get 0
1871; CHECK-NEXT:    i64x2.load32x2_u 8
1872; CHECK-NEXT:    # fallthrough-return
1873  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1874  %v = load <2 x i32>, <2 x i32>* %s
1875  %v2 = zext <2 x i32> %v to <2 x i64>
1876  ret <2 x i64> %v2
1877}
1878
1879define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1880; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
1881; CHECK:         .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128)
1882; CHECK-NEXT:  # %bb.0:
1883; CHECK-NEXT:    local.get 0
1884; CHECK-NEXT:    i64x2.load32x2_u 8
1885; CHECK-NEXT:    # fallthrough-return
1886  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1887  %v = load <2 x i32>, <2 x i32>* %s
1888  ret <2 x i32> %v
1889}
1890
1891define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
1892; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
1893; CHECK:         .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
1894; CHECK-NEXT:  # %bb.0:
1895; CHECK-NEXT:    local.get 0
1896; CHECK-NEXT:    i32.const -16
1897; CHECK-NEXT:    i32.add
1898; CHECK-NEXT:    v128.load 0
1899; CHECK-NEXT:    # fallthrough-return
1900  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
1901  %v = load <2 x i64>, <2 x i64>* %s
1902  ret <2 x i64> %v
1903}
1904
1905define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
1906; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
1907; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
1908; CHECK-NEXT:  # %bb.0:
1909; CHECK-NEXT:    local.get 0
1910; CHECK-NEXT:    i32.const -8
1911; CHECK-NEXT:    i32.add
1912; CHECK-NEXT:    v64x2.load_splat 0
1913; CHECK-NEXT:    # fallthrough-return
1914  %s = getelementptr inbounds i64, i64* %p, i32 -1
1915  %e = load i64, i64* %s
1916  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1917  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1918  ret <2 x i64> %v2
1919}
1920
1921define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1922; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
1923; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
1924; CHECK-NEXT:  # %bb.0:
1925; CHECK-NEXT:    local.get 0
1926; CHECK-NEXT:    i32.const -8
1927; CHECK-NEXT:    i32.add
1928; CHECK-NEXT:    i64x2.load32x2_s 0
1929; CHECK-NEXT:    # fallthrough-return
1930  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1931  %v = load <2 x i32>, <2 x i32>* %s
1932  %v2 = sext <2 x i32> %v to <2 x i64>
1933  ret <2 x i64> %v2
1934}
1935
1936define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1937; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
1938; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
1939; CHECK-NEXT:  # %bb.0:
1940; CHECK-NEXT:    local.get 0
1941; CHECK-NEXT:    i32.const -8
1942; CHECK-NEXT:    i32.add
1943; CHECK-NEXT:    i64x2.load32x2_u 0
1944; CHECK-NEXT:    # fallthrough-return
1945  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1946  %v = load <2 x i32>, <2 x i32>* %s
1947  %v2 = zext <2 x i32> %v to <2 x i64>
1948  ret <2 x i64> %v2
1949}
1950
1951define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1952; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
1953; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
1954; CHECK-NEXT:  # %bb.0:
1955; CHECK-NEXT:    local.get 0
1956; CHECK-NEXT:    i32.const -8
1957; CHECK-NEXT:    i32.add
1958; CHECK-NEXT:    i64x2.load32x2_u 0
1959; CHECK-NEXT:    # fallthrough-return
1960  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1961  %v = load <2 x i32>, <2 x i32>* %s
1962  ret <2 x i32> %v
1963}
1964
1965define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
1966; CHECK-LABEL: load_v2i64_with_unfolded_offset:
1967; CHECK:         .functype load_v2i64_with_unfolded_offset (i32) -> (v128)
1968; CHECK-NEXT:  # %bb.0:
1969; CHECK-NEXT:    local.get 0
1970; CHECK-NEXT:    i32.const 16
1971; CHECK-NEXT:    i32.add
1972; CHECK-NEXT:    v128.load 0
1973; CHECK-NEXT:    # fallthrough-return
1974  %q = ptrtoint <2 x i64>* %p to i32
1975  %r = add nsw i32 %q, 16
1976  %s = inttoptr i32 %r to <2 x i64>*
1977  %v = load <2 x i64>, <2 x i64>* %s
1978  ret <2 x i64> %v
1979}
1980
1981define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
1982; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
1983; CHECK:         .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128)
1984; CHECK-NEXT:  # %bb.0:
1985; CHECK-NEXT:    local.get 0
1986; CHECK-NEXT:    i32.const 16
1987; CHECK-NEXT:    i32.add
1988; CHECK-NEXT:    v64x2.load_splat 0
1989; CHECK-NEXT:    # fallthrough-return
1990  %q = ptrtoint i64* %p to i32
1991  %r = add nsw i32 %q, 16
1992  %s = inttoptr i32 %r to i64*
1993  %e = load i64, i64* %s
1994  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1995  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1996  ret <2 x i64> %v2
1997}
1998
1999define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2000; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
2001; CHECK:         .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128)
2002; CHECK-NEXT:  # %bb.0:
2003; CHECK-NEXT:    local.get 0
2004; CHECK-NEXT:    i32.const 16
2005; CHECK-NEXT:    i32.add
2006; CHECK-NEXT:    i64x2.load32x2_s 0
2007; CHECK-NEXT:    # fallthrough-return
2008  %q = ptrtoint <2 x i32>* %p to i32
2009  %r = add nsw i32 %q, 16
2010  %s = inttoptr i32 %r to <2 x i32>*
2011  %v = load <2 x i32>, <2 x i32>* %s
2012  %v2 = sext <2 x i32> %v to <2 x i64>
2013  ret <2 x i64> %v2
2014}
2015
2016define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2017; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
2018; CHECK:         .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128)
2019; CHECK-NEXT:  # %bb.0:
2020; CHECK-NEXT:    local.get 0
2021; CHECK-NEXT:    i32.const 16
2022; CHECK-NEXT:    i32.add
2023; CHECK-NEXT:    i64x2.load32x2_u 0
2024; CHECK-NEXT:    # fallthrough-return
2025  %q = ptrtoint <2 x i32>* %p to i32
2026  %r = add nsw i32 %q, 16
2027  %s = inttoptr i32 %r to <2 x i32>*
2028  %v = load <2 x i32>, <2 x i32>* %s
2029  %v2 = zext <2 x i32> %v to <2 x i64>
2030  ret <2 x i64> %v2
2031}
2032
2033define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
2034; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
2035; CHECK:         .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128)
2036; CHECK-NEXT:  # %bb.0:
2037; CHECK-NEXT:    local.get 0
2038; CHECK-NEXT:    i32.const 16
2039; CHECK-NEXT:    i32.add
2040; CHECK-NEXT:    i64x2.load32x2_u 0
2041; CHECK-NEXT:    # fallthrough-return
2042  %q = ptrtoint <2 x i32>* %p to i32
2043  %r = add nsw i32 %q, 16
2044  %s = inttoptr i32 %r to <2 x i32>*
2045  %v = load <2 x i32>, <2 x i32>* %s
2046  ret <2 x i32> %v
2047}
2048
2049define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
2050; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
2051; CHECK:         .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2052; CHECK-NEXT:  # %bb.0:
2053; CHECK-NEXT:    local.get 0
2054; CHECK-NEXT:    i32.const 16
2055; CHECK-NEXT:    i32.add
2056; CHECK-NEXT:    v128.load 0
2057; CHECK-NEXT:    # fallthrough-return
2058  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2059  %v = load <2 x i64>, <2 x i64>* %s
2060  ret <2 x i64> %v
2061}
2062
2063define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
2064; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
2065; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2066; CHECK-NEXT:  # %bb.0:
2067; CHECK-NEXT:    local.get 0
2068; CHECK-NEXT:    i32.const 8
2069; CHECK-NEXT:    i32.add
2070; CHECK-NEXT:    v64x2.load_splat 0
2071; CHECK-NEXT:    # fallthrough-return
2072  %s = getelementptr i64, i64* %p, i32 1
2073  %e = load i64, i64* %s
2074  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2075  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2076  ret <2 x i64> %v2
2077}
2078
2079define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2080; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
2081; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2082; CHECK-NEXT:  # %bb.0:
2083; CHECK-NEXT:    local.get 0
2084; CHECK-NEXT:    i32.const 8
2085; CHECK-NEXT:    i32.add
2086; CHECK-NEXT:    i64x2.load32x2_s 0
2087; CHECK-NEXT:    # fallthrough-return
2088  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2089  %v = load <2 x i32>, <2 x i32>* %s
2090  %v2 = sext <2 x i32> %v to <2 x i64>
2091  ret <2 x i64> %v2
2092}
2093
2094define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2095; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
2096; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2097; CHECK-NEXT:  # %bb.0:
2098; CHECK-NEXT:    local.get 0
2099; CHECK-NEXT:    i32.const 8
2100; CHECK-NEXT:    i32.add
2101; CHECK-NEXT:    i64x2.load32x2_u 0
2102; CHECK-NEXT:    # fallthrough-return
2103  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2104  %v = load <2 x i32>, <2 x i32>* %s
2105  %v2 = zext <2 x i32> %v to <2 x i64>
2106  ret <2 x i64> %v2
2107}
2108
2109define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
2110; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
2111; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2112; CHECK-NEXT:  # %bb.0:
2113; CHECK-NEXT:    local.get 0
2114; CHECK-NEXT:    i32.const 8
2115; CHECK-NEXT:    i32.add
2116; CHECK-NEXT:    i64x2.load32x2_u 0
2117; CHECK-NEXT:    # fallthrough-return
2118  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
2119  %v = load <2 x i32>, <2 x i32>* %s
2120  ret <2 x i32> %v
2121}
2122
2123define <2 x i64> @load_v2i64_from_numeric_address() {
2124; CHECK-LABEL: load_v2i64_from_numeric_address:
2125; CHECK:         .functype load_v2i64_from_numeric_address () -> (v128)
2126; CHECK-NEXT:  # %bb.0:
2127; CHECK-NEXT:    i32.const 0
2128; CHECK-NEXT:    v128.load 32
2129; CHECK-NEXT:    # fallthrough-return
2130  %s = inttoptr i32 32 to <2 x i64>*
2131  %v = load <2 x i64>, <2 x i64>* %s
2132  ret <2 x i64> %v
2133}
2134
2135define <2 x i64> @load_splat_v2i64_from_numeric_address() {
2136; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
2137; CHECK:         .functype load_splat_v2i64_from_numeric_address () -> (v128)
2138; CHECK-NEXT:  # %bb.0:
2139; CHECK-NEXT:    i32.const 0
2140; CHECK-NEXT:    v64x2.load_splat 32
2141; CHECK-NEXT:    # fallthrough-return
2142  %s = inttoptr i32 32 to i64*
2143  %e = load i64, i64* %s
2144  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2145  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2146  ret <2 x i64> %v2
2147}
2148
2149define <2 x i64> @load_sext_v2i64_from_numeric_address() {
2150; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
2151; CHECK:         .functype load_sext_v2i64_from_numeric_address () -> (v128)
2152; CHECK-NEXT:  # %bb.0:
2153; CHECK-NEXT:    i32.const 0
2154; CHECK-NEXT:    i64x2.load32x2_s 32
2155; CHECK-NEXT:    # fallthrough-return
2156  %s = inttoptr i32 32 to <2 x i32>*
2157  %v = load <2 x i32>, <2 x i32>* %s
2158  %v2 = sext <2 x i32> %v to <2 x i64>
2159  ret <2 x i64> %v2
2160}
2161
2162define <2 x i64> @load_zext_v2i64_from_numeric_address() {
2163; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
2164; CHECK:         .functype load_zext_v2i64_from_numeric_address () -> (v128)
2165; CHECK-NEXT:  # %bb.0:
2166; CHECK-NEXT:    i32.const 0
2167; CHECK-NEXT:    i64x2.load32x2_u 32
2168; CHECK-NEXT:    # fallthrough-return
2169  %s = inttoptr i32 32 to <2 x i32>*
2170  %v = load <2 x i32>, <2 x i32>* %s
2171  %v2 = zext <2 x i32> %v to <2 x i64>
2172  ret <2 x i64> %v2
2173}
2174
2175define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2176; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2177; CHECK:         .functype load_ext_v2i64_from_numeric_address () -> (v128)
2178; CHECK-NEXT:  # %bb.0:
2179; CHECK-NEXT:    i32.const 0
2180; CHECK-NEXT:    i64x2.load32x2_u 32
2181; CHECK-NEXT:    # fallthrough-return
2182  %s = inttoptr i32 32 to <2 x i32>*
2183  %v = load <2 x i32>, <2 x i32>* %s
2184  ret <2 x i32> %v
2185}
2186
2187@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2188define <2 x i64> @load_v2i64_from_global_address() {
2189; CHECK-LABEL: load_v2i64_from_global_address:
2190; CHECK:         .functype load_v2i64_from_global_address () -> (v128)
2191; CHECK-NEXT:  # %bb.0:
2192; CHECK-NEXT:    i32.const 0
2193; CHECK-NEXT:    v128.load gv_v2i64
2194; CHECK-NEXT:    # fallthrough-return
2195  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
2196  ret <2 x i64> %v
2197}
2198
2199@gv_i64 = global i64 42
2200define <2 x i64> @load_splat_v2i64_from_global_address() {
2201; CHECK-LABEL: load_splat_v2i64_from_global_address:
2202; CHECK:         .functype load_splat_v2i64_from_global_address () -> (v128)
2203; CHECK-NEXT:  # %bb.0:
2204; CHECK-NEXT:    i32.const 0
2205; CHECK-NEXT:    v64x2.load_splat gv_i64
2206; CHECK-NEXT:    # fallthrough-return
2207  %e = load i64, i64* @gv_i64
2208  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2209  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2210  ret <2 x i64> %v2
2211}
2212
2213@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2214define <2 x i64> @load_sext_v2i64_from_global_address() {
2215; CHECK-LABEL: load_sext_v2i64_from_global_address:
2216; CHECK:         .functype load_sext_v2i64_from_global_address () -> (v128)
2217; CHECK-NEXT:  # %bb.0:
2218; CHECK-NEXT:    i32.const 0
2219; CHECK-NEXT:    i64x2.load32x2_s gv_v2i32
2220; CHECK-NEXT:    # fallthrough-return
2221  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2222  %v2 = sext <2 x i32> %v to <2 x i64>
2223  ret <2 x i64> %v2
2224}
2225
2226define <2 x i64> @load_zext_v2i64_from_global_address() {
2227; CHECK-LABEL: load_zext_v2i64_from_global_address:
2228; CHECK:         .functype load_zext_v2i64_from_global_address () -> (v128)
2229; CHECK-NEXT:  # %bb.0:
2230; CHECK-NEXT:    i32.const 0
2231; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2232; CHECK-NEXT:    # fallthrough-return
2233  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2234  %v2 = zext <2 x i32> %v to <2 x i64>
2235  ret <2 x i64> %v2
2236}
2237
2238define <2 x i32> @load_ext_v2i64_from_global_address() {
2239; CHECK-LABEL: load_ext_v2i64_from_global_address:
2240; CHECK:         .functype load_ext_v2i64_from_global_address () -> (v128)
2241; CHECK-NEXT:  # %bb.0:
2242; CHECK-NEXT:    i32.const 0
2243; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2244; CHECK-NEXT:    # fallthrough-return
2245  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2246  ret <2 x i32> %v
2247}
2248
2249define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
2250; CHECK-LABEL: store_v2i64:
2251; CHECK:         .functype store_v2i64 (v128, i32) -> ()
2252; CHECK-NEXT:  # %bb.0:
2253; CHECK-NEXT:    local.get 1
2254; CHECK-NEXT:    local.get 0
2255; CHECK-NEXT:    v128.store 0
2256; CHECK-NEXT:    # fallthrough-return
2257  store <2 x i64> %v , <2 x i64>* %p
2258  ret void
2259}
2260
2261define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
2262; CHECK-LABEL: store_v2i64_with_folded_offset:
2263; CHECK:         .functype store_v2i64_with_folded_offset (v128, i32) -> ()
2264; CHECK-NEXT:  # %bb.0:
2265; CHECK-NEXT:    local.get 1
2266; CHECK-NEXT:    local.get 0
2267; CHECK-NEXT:    v128.store 16
2268; CHECK-NEXT:    # fallthrough-return
2269  %q = ptrtoint <2 x i64>* %p to i32
2270  %r = add nuw i32 %q, 16
2271  %s = inttoptr i32 %r to <2 x i64>*
2272  store <2 x i64> %v , <2 x i64>* %s
2273  ret void
2274}
2275
2276define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2277; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2278; CHECK:         .functype store_v2i64_with_folded_gep_offset (v128, i32) -> ()
2279; CHECK-NEXT:  # %bb.0:
2280; CHECK-NEXT:    local.get 1
2281; CHECK-NEXT:    local.get 0
2282; CHECK-NEXT:    v128.store 16
2283; CHECK-NEXT:    # fallthrough-return
2284  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2285  store <2 x i64> %v , <2 x i64>* %s
2286  ret void
2287}
2288
2289define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
2290; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2291; CHECK:         .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
2292; CHECK-NEXT:  # %bb.0:
2293; CHECK-NEXT:    local.get 1
2294; CHECK-NEXT:    i32.const -16
2295; CHECK-NEXT:    i32.add
2296; CHECK-NEXT:    local.get 0
2297; CHECK-NEXT:    v128.store 0
2298; CHECK-NEXT:    # fallthrough-return
2299  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2300  store <2 x i64> %v , <2 x i64>* %s
2301  ret void
2302}
2303
2304define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
2305; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2306; CHECK:         .functype store_v2i64_with_unfolded_offset (v128, i32) -> ()
2307; CHECK-NEXT:  # %bb.0:
2308; CHECK-NEXT:    local.get 1
2309; CHECK-NEXT:    i32.const 16
2310; CHECK-NEXT:    i32.add
2311; CHECK-NEXT:    local.get 0
2312; CHECK-NEXT:    v128.store 0
2313; CHECK-NEXT:    # fallthrough-return
2314  %q = ptrtoint <2 x i64>* %p to i32
2315  %r = add nsw i32 %q, 16
2316  %s = inttoptr i32 %r to <2 x i64>*
2317  store <2 x i64> %v , <2 x i64>* %s
2318  ret void
2319}
2320
2321define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2322; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2323; CHECK:         .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> ()
2324; CHECK-NEXT:  # %bb.0:
2325; CHECK-NEXT:    local.get 1
2326; CHECK-NEXT:    i32.const 16
2327; CHECK-NEXT:    i32.add
2328; CHECK-NEXT:    local.get 0
2329; CHECK-NEXT:    v128.store 0
2330; CHECK-NEXT:    # fallthrough-return
2331  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2332  store <2 x i64> %v , <2 x i64>* %s
2333  ret void
2334}
2335
2336define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2337; CHECK-LABEL: store_v2i64_to_numeric_address:
2338; CHECK:         .functype store_v2i64_to_numeric_address (v128) -> ()
2339; CHECK-NEXT:  # %bb.0:
2340; CHECK-NEXT:    i32.const 0
2341; CHECK-NEXT:    local.get 0
2342; CHECK-NEXT:    v128.store 32
2343; CHECK-NEXT:    # fallthrough-return
2344  %s = inttoptr i32 32 to <2 x i64>*
2345  store <2 x i64> %v , <2 x i64>* %s
2346  ret void
2347}
2348
2349define void @store_v2i64_to_global_address(<2 x i64> %v) {
2350; CHECK-LABEL: store_v2i64_to_global_address:
2351; CHECK:         .functype store_v2i64_to_global_address (v128) -> ()
2352; CHECK-NEXT:  # %bb.0:
2353; CHECK-NEXT:    i32.const 0
2354; CHECK-NEXT:    local.get 0
2355; CHECK-NEXT:    v128.store gv_v2i64
2356; CHECK-NEXT:    # fallthrough-return
2357  store <2 x i64> %v , <2 x i64>* @gv_v2i64
2358  ret void
2359}
2360
2361; ==============================================================================
2362; 4 x float
2363; ==============================================================================
2364define <4 x float> @load_v4f32(<4 x float>* %p) {
2365; CHECK-LABEL: load_v4f32:
2366; CHECK:         .functype load_v4f32 (i32) -> (v128)
2367; CHECK-NEXT:  # %bb.0:
2368; CHECK-NEXT:    local.get 0
2369; CHECK-NEXT:    v128.load 0
2370; CHECK-NEXT:    # fallthrough-return
2371  %v = load <4 x float>, <4 x float>* %p
2372  ret <4 x float> %v
2373}
2374
2375define <4 x float> @load_splat_v4f32(float* %p) {
2376; CHECK-LABEL: load_splat_v4f32:
2377; CHECK:         .functype load_splat_v4f32 (i32) -> (v128)
2378; CHECK-NEXT:  # %bb.0:
2379; CHECK-NEXT:    local.get 0
2380; CHECK-NEXT:    v32x4.load_splat 0
2381; CHECK-NEXT:    # fallthrough-return
2382  %e = load float, float* %p
2383  %v1 = insertelement <4 x float> undef, float %e, i32 0
2384  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2385  ret <4 x float> %v2
2386}
2387
2388define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
2389; CHECK-LABEL: load_v4f32_with_folded_offset:
2390; CHECK:         .functype load_v4f32_with_folded_offset (i32) -> (v128)
2391; CHECK-NEXT:  # %bb.0:
2392; CHECK-NEXT:    local.get 0
2393; CHECK-NEXT:    v128.load 16
2394; CHECK-NEXT:    # fallthrough-return
2395  %q = ptrtoint <4 x float>* %p to i32
2396  %r = add nuw i32 %q, 16
2397  %s = inttoptr i32 %r to <4 x float>*
2398  %v = load <4 x float>, <4 x float>* %s
2399  ret <4 x float> %v
2400}
2401
2402define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
2403; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2404; CHECK:         .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
2405; CHECK-NEXT:  # %bb.0:
2406; CHECK-NEXT:    local.get 0
2407; CHECK-NEXT:    v32x4.load_splat 16
2408; CHECK-NEXT:    # fallthrough-return
2409  %q = ptrtoint float* %p to i32
2410  %r = add nuw i32 %q, 16
2411  %s = inttoptr i32 %r to float*
2412  %e = load float, float* %s
2413  %v1 = insertelement <4 x float> undef, float %e, i32 0
2414  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2415  ret <4 x float> %v2
2416}
2417
2418define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
2419; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2420; CHECK:         .functype load_v4f32_with_folded_gep_offset (i32) -> (v128)
2421; CHECK-NEXT:  # %bb.0:
2422; CHECK-NEXT:    local.get 0
2423; CHECK-NEXT:    v128.load 16
2424; CHECK-NEXT:    # fallthrough-return
2425  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2426  %v = load <4 x float>, <4 x float>* %s
2427  ret <4 x float> %v
2428}
2429
2430define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
2431; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2432; CHECK:         .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
2433; CHECK-NEXT:  # %bb.0:
2434; CHECK-NEXT:    local.get 0
2435; CHECK-NEXT:    v32x4.load_splat 4
2436; CHECK-NEXT:    # fallthrough-return
2437  %s = getelementptr inbounds float, float* %p, i32 1
2438  %e = load float, float* %s
2439  %v1 = insertelement <4 x float> undef, float %e, i32 0
2440  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2441  ret <4 x float> %v2
2442}
2443
2444define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
2445; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2446; CHECK:         .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2447; CHECK-NEXT:  # %bb.0:
2448; CHECK-NEXT:    local.get 0
2449; CHECK-NEXT:    i32.const -16
2450; CHECK-NEXT:    i32.add
2451; CHECK-NEXT:    v128.load 0
2452; CHECK-NEXT:    # fallthrough-return
2453  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2454  %v = load <4 x float>, <4 x float>* %s
2455  ret <4 x float> %v
2456}
2457
2458define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
2459; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2460; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2461; CHECK-NEXT:  # %bb.0:
2462; CHECK-NEXT:    local.get 0
2463; CHECK-NEXT:    i32.const -4
2464; CHECK-NEXT:    i32.add
2465; CHECK-NEXT:    v32x4.load_splat 0
2466; CHECK-NEXT:    # fallthrough-return
2467  %s = getelementptr inbounds float, float* %p, i32 -1
2468  %e = load float, float* %s
2469  %v1 = insertelement <4 x float> undef, float %e, i32 0
2470  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2471  ret <4 x float> %v2
2472}
2473
2474define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
2475; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2476; CHECK:         .functype load_v4f32_with_unfolded_offset (i32) -> (v128)
2477; CHECK-NEXT:  # %bb.0:
2478; CHECK-NEXT:    local.get 0
2479; CHECK-NEXT:    i32.const 16
2480; CHECK-NEXT:    i32.add
2481; CHECK-NEXT:    v128.load 0
2482; CHECK-NEXT:    # fallthrough-return
2483  %q = ptrtoint <4 x float>* %p to i32
2484  %r = add nsw i32 %q, 16
2485  %s = inttoptr i32 %r to <4 x float>*
2486  %v = load <4 x float>, <4 x float>* %s
2487  ret <4 x float> %v
2488}
2489
2490define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
2491; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2492; CHECK:         .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128)
2493; CHECK-NEXT:  # %bb.0:
2494; CHECK-NEXT:    local.get 0
2495; CHECK-NEXT:    i32.const 16
2496; CHECK-NEXT:    i32.add
2497; CHECK-NEXT:    v32x4.load_splat 0
2498; CHECK-NEXT:    # fallthrough-return
2499  %q = ptrtoint float* %p to i32
2500  %r = add nsw i32 %q, 16
2501  %s = inttoptr i32 %r to float*
2502  %e = load float, float* %s
2503  %v1 = insertelement <4 x float> undef, float %e, i32 0
2504  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2505  ret <4 x float> %v2
2506}
2507
2508define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
2509; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2510; CHECK:         .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2511; CHECK-NEXT:  # %bb.0:
2512; CHECK-NEXT:    local.get 0
2513; CHECK-NEXT:    i32.const 16
2514; CHECK-NEXT:    i32.add
2515; CHECK-NEXT:    v128.load 0
2516; CHECK-NEXT:    # fallthrough-return
2517  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2518  %v = load <4 x float>, <4 x float>* %s
2519  ret <4 x float> %v
2520}
2521
2522define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
2523; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
2524; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2525; CHECK-NEXT:  # %bb.0:
2526; CHECK-NEXT:    local.get 0
2527; CHECK-NEXT:    i32.const 4
2528; CHECK-NEXT:    i32.add
2529; CHECK-NEXT:    v32x4.load_splat 0
2530; CHECK-NEXT:    # fallthrough-return
2531  %s = getelementptr float, float* %p, i32 1
2532  %e = load float, float* %s
2533  %v1 = insertelement <4 x float> undef, float %e, i32 0
2534  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2535  ret <4 x float> %v2
2536}
2537
2538define <4 x float> @load_v4f32_from_numeric_address() {
2539; CHECK-LABEL: load_v4f32_from_numeric_address:
2540; CHECK:         .functype load_v4f32_from_numeric_address () -> (v128)
2541; CHECK-NEXT:  # %bb.0:
2542; CHECK-NEXT:    i32.const 0
2543; CHECK-NEXT:    v128.load 32
2544; CHECK-NEXT:    # fallthrough-return
2545  %s = inttoptr i32 32 to <4 x float>*
2546  %v = load <4 x float>, <4 x float>* %s
2547  ret <4 x float> %v
2548}
2549
2550define <4 x float> @load_splat_v4f32_from_numeric_address() {
2551; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
2552; CHECK:         .functype load_splat_v4f32_from_numeric_address () -> (v128)
2553; CHECK-NEXT:  # %bb.0:
2554; CHECK-NEXT:    i32.const 0
2555; CHECK-NEXT:    v32x4.load_splat 32
2556; CHECK-NEXT:    # fallthrough-return
2557  %s = inttoptr i32 32 to float*
2558  %e = load float, float* %s
2559  %v1 = insertelement <4 x float> undef, float %e, i32 0
2560  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2561  ret <4 x float> %v2
2562}
2563
2564@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
2565define <4 x float> @load_v4f32_from_global_address() {
2566; CHECK-LABEL: load_v4f32_from_global_address:
2567; CHECK:         .functype load_v4f32_from_global_address () -> (v128)
2568; CHECK-NEXT:  # %bb.0:
2569; CHECK-NEXT:    i32.const 0
2570; CHECK-NEXT:    v128.load gv_v4f32
2571; CHECK-NEXT:    # fallthrough-return
2572  %v = load <4 x float>, <4 x float>* @gv_v4f32
2573  ret <4 x float> %v
2574}
2575
2576@gv_f32 = global float 42.
2577define <4 x float> @load_splat_v4f32_from_global_address() {
2578; CHECK-LABEL: load_splat_v4f32_from_global_address:
2579; CHECK:         .functype load_splat_v4f32_from_global_address () -> (v128)
2580; CHECK-NEXT:  # %bb.0:
2581; CHECK-NEXT:    i32.const 0
2582; CHECK-NEXT:    v32x4.load_splat gv_f32
2583; CHECK-NEXT:    # fallthrough-return
2584  %e = load float, float* @gv_f32
2585  %v1 = insertelement <4 x float> undef, float %e, i32 0
2586  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2587  ret <4 x float> %v2
2588}
2589
2590define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
2591; CHECK-LABEL: store_v4f32:
2592; CHECK:         .functype store_v4f32 (v128, i32) -> ()
2593; CHECK-NEXT:  # %bb.0:
2594; CHECK-NEXT:    local.get 1
2595; CHECK-NEXT:    local.get 0
2596; CHECK-NEXT:    v128.store 0
2597; CHECK-NEXT:    # fallthrough-return
2598  store <4 x float> %v , <4 x float>* %p
2599  ret void
2600}
2601
2602define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
2603; CHECK-LABEL: store_v4f32_with_folded_offset:
2604; CHECK:         .functype store_v4f32_with_folded_offset (v128, i32) -> ()
2605; CHECK-NEXT:  # %bb.0:
2606; CHECK-NEXT:    local.get 1
2607; CHECK-NEXT:    local.get 0
2608; CHECK-NEXT:    v128.store 16
2609; CHECK-NEXT:    # fallthrough-return
2610  %q = ptrtoint <4 x float>* %p to i32
2611  %r = add nuw i32 %q, 16
2612  %s = inttoptr i32 %r to <4 x float>*
2613  store <4 x float> %v , <4 x float>* %s
2614  ret void
2615}
2616
2617define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2618; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
2619; CHECK:         .functype store_v4f32_with_folded_gep_offset (v128, i32) -> ()
2620; CHECK-NEXT:  # %bb.0:
2621; CHECK-NEXT:    local.get 1
2622; CHECK-NEXT:    local.get 0
2623; CHECK-NEXT:    v128.store 16
2624; CHECK-NEXT:    # fallthrough-return
2625  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2626  store <4 x float> %v , <4 x float>* %s
2627  ret void
2628}
2629
2630define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
2631; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
2632; CHECK:         .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> ()
2633; CHECK-NEXT:  # %bb.0:
2634; CHECK-NEXT:    local.get 1
2635; CHECK-NEXT:    i32.const -16
2636; CHECK-NEXT:    i32.add
2637; CHECK-NEXT:    local.get 0
2638; CHECK-NEXT:    v128.store 0
2639; CHECK-NEXT:    # fallthrough-return
2640  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2641  store <4 x float> %v , <4 x float>* %s
2642  ret void
2643}
2644
2645define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
2646; CHECK-LABEL: store_v4f32_with_unfolded_offset:
2647; CHECK:         .functype store_v4f32_with_unfolded_offset (v128, i32) -> ()
2648; CHECK-NEXT:  # %bb.0:
2649; CHECK-NEXT:    local.get 1
2650; CHECK-NEXT:    i32.const 16
2651; CHECK-NEXT:    i32.add
2652; CHECK-NEXT:    local.get 0
2653; CHECK-NEXT:    v128.store 0
2654; CHECK-NEXT:    # fallthrough-return
2655  %q = ptrtoint <4 x float>* %p to i32
2656  %r = add nsw i32 %q, 16
2657  %s = inttoptr i32 %r to <4 x float>*
2658  store <4 x float> %v , <4 x float>* %s
2659  ret void
2660}
2661
2662define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2663; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
2664; CHECK:         .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> ()
2665; CHECK-NEXT:  # %bb.0:
2666; CHECK-NEXT:    local.get 1
2667; CHECK-NEXT:    i32.const 16
2668; CHECK-NEXT:    i32.add
2669; CHECK-NEXT:    local.get 0
2670; CHECK-NEXT:    v128.store 0
2671; CHECK-NEXT:    # fallthrough-return
2672  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2673  store <4 x float> %v , <4 x float>* %s
2674  ret void
2675}
2676
2677define void @store_v4f32_to_numeric_address(<4 x float> %v) {
2678; CHECK-LABEL: store_v4f32_to_numeric_address:
2679; CHECK:         .functype store_v4f32_to_numeric_address (v128) -> ()
2680; CHECK-NEXT:  # %bb.0:
2681; CHECK-NEXT:    i32.const 0
2682; CHECK-NEXT:    local.get 0
2683; CHECK-NEXT:    v128.store 32
2684; CHECK-NEXT:    # fallthrough-return
2685  %s = inttoptr i32 32 to <4 x float>*
2686  store <4 x float> %v , <4 x float>* %s
2687  ret void
2688}
2689
2690define void @store_v4f32_to_global_address(<4 x float> %v) {
2691; CHECK-LABEL: store_v4f32_to_global_address:
2692; CHECK:         .functype store_v4f32_to_global_address (v128) -> ()
2693; CHECK-NEXT:  # %bb.0:
2694; CHECK-NEXT:    i32.const 0
2695; CHECK-NEXT:    local.get 0
2696; CHECK-NEXT:    v128.store gv_v4f32
2697; CHECK-NEXT:    # fallthrough-return
2698  store <4 x float> %v , <4 x float>* @gv_v4f32
2699  ret void
2700}
2701
2702; ==============================================================================
2703; 2 x double
2704; ==============================================================================
2705define <2 x double> @load_v2f64(<2 x double>* %p) {
2706; CHECK-LABEL: load_v2f64:
2707; CHECK:         .functype load_v2f64 (i32) -> (v128)
2708; CHECK-NEXT:  # %bb.0:
2709; CHECK-NEXT:    local.get 0
2710; CHECK-NEXT:    v128.load 0
2711; CHECK-NEXT:    # fallthrough-return
2712  %v = load <2 x double>, <2 x double>* %p
2713  ret <2 x double> %v
2714}
2715
2716define <2 x double> @load_splat_v2f64(double* %p) {
2717; CHECK-LABEL: load_splat_v2f64:
2718; CHECK:         .functype load_splat_v2f64 (i32) -> (v128)
2719; CHECK-NEXT:  # %bb.0:
2720; CHECK-NEXT:    local.get 0
2721; CHECK-NEXT:    v64x2.load_splat 0
2722; CHECK-NEXT:    # fallthrough-return
2723  %e = load double, double* %p
2724  %v1 = insertelement <2 x double> undef, double %e, i32 0
2725  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2726  ret <2 x double> %v2
2727}
2728
2729define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
2730; CHECK-LABEL: load_v2f64_with_folded_offset:
2731; CHECK:         .functype load_v2f64_with_folded_offset (i32) -> (v128)
2732; CHECK-NEXT:  # %bb.0:
2733; CHECK-NEXT:    local.get 0
2734; CHECK-NEXT:    v128.load 16
2735; CHECK-NEXT:    # fallthrough-return
2736  %q = ptrtoint <2 x double>* %p to i32
2737  %r = add nuw i32 %q, 16
2738  %s = inttoptr i32 %r to <2 x double>*
2739  %v = load <2 x double>, <2 x double>* %s
2740  ret <2 x double> %v
2741}
2742
2743define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
2744; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
2745; CHECK:         .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
2746; CHECK-NEXT:  # %bb.0:
2747; CHECK-NEXT:    local.get 0
2748; CHECK-NEXT:    v64x2.load_splat 16
2749; CHECK-NEXT:    # fallthrough-return
2750  %q = ptrtoint double* %p to i32
2751  %r = add nuw i32 %q, 16
2752  %s = inttoptr i32 %r to double*
2753  %e = load double, double* %s
2754  %v1 = insertelement <2 x double> undef, double %e, i32 0
2755  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2756  ret <2 x double> %v2
2757}
2758
2759define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
2760; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
2761; CHECK:         .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
2762; CHECK-NEXT:  # %bb.0:
2763; CHECK-NEXT:    local.get 0
2764; CHECK-NEXT:    v128.load 16
2765; CHECK-NEXT:    # fallthrough-return
2766  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
2767  %v = load <2 x double>, <2 x double>* %s
2768  ret <2 x double> %v
2769}
2770
2771define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
2772; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
2773; CHECK:         .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
2774; CHECK-NEXT:  # %bb.0:
2775; CHECK-NEXT:    local.get 0
2776; CHECK-NEXT:    v64x2.load_splat 8
2777; CHECK-NEXT:    # fallthrough-return
2778  %s = getelementptr inbounds double, double* %p, i32 1
2779  %e = load double, double* %s
2780  %v1 = insertelement <2 x double> undef, double %e, i32 0
2781  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2782  ret <2 x double> %v2
2783}
2784
2785define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
2786; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
2787; CHECK:         .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
2788; CHECK-NEXT:  # %bb.0:
2789; CHECK-NEXT:    local.get 0
2790; CHECK-NEXT:    i32.const -16
2791; CHECK-NEXT:    i32.add
2792; CHECK-NEXT:    v128.load 0
2793; CHECK-NEXT:    # fallthrough-return
2794  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2795  %v = load <2 x double>, <2 x double>* %s
2796  ret <2 x double> %v
2797}
2798
2799define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
2800; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
2801; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
2802; CHECK-NEXT:  # %bb.0:
2803; CHECK-NEXT:    local.get 0
2804; CHECK-NEXT:    i32.const -8
2805; CHECK-NEXT:    i32.add
2806; CHECK-NEXT:    v64x2.load_splat 0
2807; CHECK-NEXT:    # fallthrough-return
2808  %s = getelementptr inbounds double, double* %p, i32 -1
2809  %e = load double, double* %s
2810  %v1 = insertelement <2 x double> undef, double %e, i32 0
2811  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2812  ret <2 x double> %v2
2813}
2814
2815define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
2816; CHECK-LABEL: load_v2f64_with_unfolded_offset:
2817; CHECK:         .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
2818; CHECK-NEXT:  # %bb.0:
2819; CHECK-NEXT:    local.get 0
2820; CHECK-NEXT:    i32.const 16
2821; CHECK-NEXT:    i32.add
2822; CHECK-NEXT:    v128.load 0
2823; CHECK-NEXT:    # fallthrough-return
2824  %q = ptrtoint <2 x double>* %p to i32
2825  %r = add nsw i32 %q, 16
2826  %s = inttoptr i32 %r to <2 x double>*
2827  %v = load <2 x double>, <2 x double>* %s
2828  ret <2 x double> %v
2829}
2830
2831define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
2832; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
2833; CHECK:         .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128)
2834; CHECK-NEXT:  # %bb.0:
2835; CHECK-NEXT:    local.get 0
2836; CHECK-NEXT:    i32.const 16
2837; CHECK-NEXT:    i32.add
2838; CHECK-NEXT:    v64x2.load_splat 0
2839; CHECK-NEXT:    # fallthrough-return
2840  %q = ptrtoint double* %p to i32
2841  %r = add nsw i32 %q, 16
2842  %s = inttoptr i32 %r to double*
2843  %e = load double, double* %s
2844  %v1 = insertelement <2 x double> undef, double %e, i32 0
2845  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2846  ret <2 x double> %v2
2847}
2848
2849define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
2850; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
2851; CHECK:         .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
2852; CHECK-NEXT:  # %bb.0:
2853; CHECK-NEXT:    local.get 0
2854; CHECK-NEXT:    i32.const 16
2855; CHECK-NEXT:    i32.add
2856; CHECK-NEXT:    v128.load 0
2857; CHECK-NEXT:    # fallthrough-return
2858  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
2859  %v = load <2 x double>, <2 x double>* %s
2860  ret <2 x double> %v
2861}
2862
2863define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
2864; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
2865; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128)
2866; CHECK-NEXT:  # %bb.0:
2867; CHECK-NEXT:    local.get 0
2868; CHECK-NEXT:    i32.const 8
2869; CHECK-NEXT:    i32.add
2870; CHECK-NEXT:    v64x2.load_splat 0
2871; CHECK-NEXT:    # fallthrough-return
2872  %s = getelementptr double, double* %p, i32 1
2873  %e = load double, double* %s
2874  %v1 = insertelement <2 x double> undef, double %e, i32 0
2875  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2876  ret <2 x double> %v2
2877}
2878
2879define <2 x double> @load_v2f64_from_numeric_address() {
2880; CHECK-LABEL: load_v2f64_from_numeric_address:
2881; CHECK:         .functype load_v2f64_from_numeric_address () -> (v128)
2882; CHECK-NEXT:  # %bb.0:
2883; CHECK-NEXT:    i32.const 0
2884; CHECK-NEXT:    v128.load 32
2885; CHECK-NEXT:    # fallthrough-return
2886  %s = inttoptr i32 32 to <2 x double>*
2887  %v = load <2 x double>, <2 x double>* %s
2888  ret <2 x double> %v
2889}
2890
2891define <2 x double> @load_splat_v2f64_from_numeric_address() {
2892; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
2893; CHECK:         .functype load_splat_v2f64_from_numeric_address () -> (v128)
2894; CHECK-NEXT:  # %bb.0:
2895; CHECK-NEXT:    i32.const 0
2896; CHECK-NEXT:    v64x2.load_splat 32
2897; CHECK-NEXT:    # fallthrough-return
2898  %s = inttoptr i32 32 to double*
2899  %e = load double, double* %s
2900  %v1 = insertelement <2 x double> undef, double %e, i32 0
2901  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2902  ret <2 x double> %v2
2903}
2904
2905@gv_v2f64 = global <2 x double> <double 42., double 42.>
2906define <2 x double> @load_v2f64_from_global_address() {
2907; CHECK-LABEL: load_v2f64_from_global_address:
2908; CHECK:         .functype load_v2f64_from_global_address () -> (v128)
2909; CHECK-NEXT:  # %bb.0:
2910; CHECK-NEXT:    i32.const 0
2911; CHECK-NEXT:    v128.load gv_v2f64
2912; CHECK-NEXT:    # fallthrough-return
2913  %v = load <2 x double>, <2 x double>* @gv_v2f64
2914  ret <2 x double> %v
2915}
2916
2917@gv_f64 = global double 42.
2918define <2 x double> @load_splat_v2f64_from_global_address() {
2919; CHECK-LABEL: load_splat_v2f64_from_global_address:
2920; CHECK:         .functype load_splat_v2f64_from_global_address () -> (v128)
2921; CHECK-NEXT:  # %bb.0:
2922; CHECK-NEXT:    i32.const 0
2923; CHECK-NEXT:    v64x2.load_splat gv_f64
2924; CHECK-NEXT:    # fallthrough-return
2925  %e = load double, double* @gv_f64
2926  %v1 = insertelement <2 x double> undef, double %e, i32 0
2927  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2928  ret <2 x double> %v2
2929}
2930
2931define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
2932; CHECK-LABEL: store_v2f64:
2933; CHECK:         .functype store_v2f64 (v128, i32) -> ()
2934; CHECK-NEXT:  # %bb.0:
2935; CHECK-NEXT:    local.get 1
2936; CHECK-NEXT:    local.get 0
2937; CHECK-NEXT:    v128.store 0
2938; CHECK-NEXT:    # fallthrough-return
2939  store <2 x double> %v , <2 x double>* %p
2940  ret void
2941}
2942
2943define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
2944; CHECK-LABEL: store_v2f64_with_folded_offset:
2945; CHECK:         .functype store_v2f64_with_folded_offset (v128, i32) -> ()
2946; CHECK-NEXT:  # %bb.0:
2947; CHECK-NEXT:    local.get 1
2948; CHECK-NEXT:    local.get 0
2949; CHECK-NEXT:    v128.store 16
2950; CHECK-NEXT:    # fallthrough-return
2951  %q = ptrtoint <2 x double>* %p to i32
2952  %r = add nuw i32 %q, 16
2953  %s = inttoptr i32 %r to <2 x double>*
2954  store <2 x double> %v , <2 x double>* %s
2955  ret void
2956}
2957
2958define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
2959; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
2960; CHECK:         .functype store_v2f64_with_folded_gep_offset (v128, i32) -> ()
2961; CHECK-NEXT:  # %bb.0:
2962; CHECK-NEXT:    local.get 1
2963; CHECK-NEXT:    local.get 0
2964; CHECK-NEXT:    v128.store 16
2965; CHECK-NEXT:    # fallthrough-return
2966  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
2967  store <2 x double> %v , <2 x double>* %s
2968  ret void
2969}
2970
2971define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
2972; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
2973; CHECK:         .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> ()
2974; CHECK-NEXT:  # %bb.0:
2975; CHECK-NEXT:    local.get 1
2976; CHECK-NEXT:    i32.const -16
2977; CHECK-NEXT:    i32.add
2978; CHECK-NEXT:    local.get 0
2979; CHECK-NEXT:    v128.store 0
2980; CHECK-NEXT:    # fallthrough-return
2981  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2982  store <2 x double> %v , <2 x double>* %s
2983  ret void
2984}
2985
2986define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
2987; CHECK-LABEL: store_v2f64_with_unfolded_offset:
2988; CHECK:         .functype store_v2f64_with_unfolded_offset (v128, i32) -> ()
2989; CHECK-NEXT:  # %bb.0:
2990; CHECK-NEXT:    local.get 1
2991; CHECK-NEXT:    i32.const 16
2992; CHECK-NEXT:    i32.add
2993; CHECK-NEXT:    local.get 0
2994; CHECK-NEXT:    v128.store 0
2995; CHECK-NEXT:    # fallthrough-return
2996  %q = ptrtoint <2 x double>* %p to i32
2997  %r = add nsw i32 %q, 16
2998  %s = inttoptr i32 %r to <2 x double>*
2999  store <2 x double> %v , <2 x double>* %s
3000  ret void
3001}
3002
3003define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
3004; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
3005; CHECK:         .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> ()
3006; CHECK-NEXT:  # %bb.0:
3007; CHECK-NEXT:    local.get 1
3008; CHECK-NEXT:    i32.const 16
3009; CHECK-NEXT:    i32.add
3010; CHECK-NEXT:    local.get 0
3011; CHECK-NEXT:    v128.store 0
3012; CHECK-NEXT:    # fallthrough-return
3013  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
3014  store <2 x double> %v , <2 x double>* %s
3015  ret void
3016}
3017
3018define void @store_v2f64_to_numeric_address(<2 x double> %v) {
3019; CHECK-LABEL: store_v2f64_to_numeric_address:
3020; CHECK:         .functype store_v2f64_to_numeric_address (v128) -> ()
3021; CHECK-NEXT:  # %bb.0:
3022; CHECK-NEXT:    i32.const 0
3023; CHECK-NEXT:    local.get 0
3024; CHECK-NEXT:    v128.store 32
3025; CHECK-NEXT:    # fallthrough-return
3026  %s = inttoptr i32 32 to <2 x double>*
3027  store <2 x double> %v , <2 x double>* %s
3028  ret void
3029}
3030
3031define void @store_v2f64_to_global_address(<2 x double> %v) {
3032; CHECK-LABEL: store_v2f64_to_global_address:
3033; CHECK:         .functype store_v2f64_to_global_address (v128) -> ()
3034; CHECK-NEXT:  # %bb.0:
3035; CHECK-NEXT:    i32.const 0
3036; CHECK-NEXT:    local.get 0
3037; CHECK-NEXT:    v128.store gv_v2f64
3038; CHECK-NEXT:    # fallthrough-return
3039  store <2 x double> %v , <2 x double>* @gv_v2f64
3040  ret void
3041}
3042