1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
3; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128
4
5; Test SIMD loads and stores
6
7target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
8target triple = "wasm32-unknown-unknown"
9
10; ==============================================================================
11; 16 x i8
12; ==============================================================================
13; CHECK-LABEL: load_v16i8:
14; NO-SIMD128-NOT: v128
15; SIMD128-NEXT: .functype load_v16i8 (i32) -> (v128){{$}}
16; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
17; SIMD128-NEXT: return $pop[[R]]{{$}}
18define <16 x i8> @load_v16i8(<16 x i8>* %p) {
19  %v = load <16 x i8>, <16 x i8>* %p
20  ret <16 x i8> %v
21}
22
23; CHECK-LABEL: load_splat_v16i8:
24; SIMD128-VM-NOT: v8x16.load_splat
25; NO-SIMD128-NOT: v128
26; SIMD128-NEXT: .functype load_splat_v16i8 (i32) -> (v128){{$}}
27; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
28; SIMD128-NEXT: return $pop[[R]]{{$}}
29define <16 x i8> @load_splat_v16i8(i8* %p) {
30  %e = load i8, i8* %p
31  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
32  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
33  ret <16 x i8> %v2
34}
35
36; CHECK-LABEL: load_v16i8_with_folded_offset:
37; NO-SIMD128-NOT: v128
38; SIMD128-NEXT: .functype load_v16i8_with_folded_offset (i32) -> (v128){{$}}
39; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
40; SIMD128-NEXT: return $pop[[R]]{{$}}
41define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
42  %q = ptrtoint <16 x i8>* %p to i32
43  %r = add nuw i32 %q, 16
44  %s = inttoptr i32 %r to <16 x i8>*
45  %v = load <16 x i8>, <16 x i8>* %s
46  ret <16 x i8> %v
47}
48
49; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
50; NO-SIMD128-NOT: v128
51; SIMD128-NEXT: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128){{$}}
52; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
53; SIMD128-NEXT: return $pop[[R]]{{$}}
54define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
55  %q = ptrtoint i8* %p to i32
56  %r = add nuw i32 %q, 16
57  %s = inttoptr i32 %r to i8*
58  %e = load i8, i8* %s
59  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
60  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
61  ret <16 x i8> %v2
62}
63
64; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
65; NO-SIMD128-NOT: v128
66; SIMD128-NEXT: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128){{$}}
67; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
68; SIMD128-NEXT: return $pop[[R]]{{$}}
69define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
70  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
71  %v = load <16 x i8>, <16 x i8>* %s
72  ret <16 x i8> %v
73}
74
75; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
76; NO-SIMD128-NOT: v128
77; SIMD128-NEXT: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128){{$}}
78; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 1($0){{$}}
79; SIMD128-NEXT: return $pop[[R]]{{$}}
80define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
81  %s = getelementptr inbounds i8, i8* %p, i32 1
82  %e = load i8, i8* %s
83  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
84  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
85  ret <16 x i8> %v2
86}
87
88; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
89; NO-SIMD128-NOT: v128
90; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
91; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
92; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
93; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
94; SIMD128-NEXT: return $pop[[R]]{{$}}
95define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
96  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
97  %v = load <16 x i8>, <16 x i8>* %s
98  ret <16 x i8> %v
99}
100
101; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
102; NO-SIMD128-NOT: v128
103; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
104; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
105; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
106; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
107; SIMD128-NEXT: return $pop[[R]]{{$}}
108define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
109  %s = getelementptr inbounds i8, i8* %p, i32 -1
110  %e = load i8, i8* %s
111  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
112  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
113  ret <16 x i8> %v2
114}
115
116; CHECK-LABEL: load_v16i8_with_unfolded_offset:
117; NO-SIMD128-NOT: v128
118; SIMD128-NEXT: .functype load_v16i8_with_unfolded_offset (i32) -> (v128){{$}}
119; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
120; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
121; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
122; SIMD128-NEXT: return $pop[[R]]{{$}}
123define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
124  %q = ptrtoint <16 x i8>* %p to i32
125  %r = add nsw i32 %q, 16
126  %s = inttoptr i32 %r to <16 x i8>*
127  %v = load <16 x i8>, <16 x i8>* %s
128  ret <16 x i8> %v
129}
130
131; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
132; NO-SIMD128-NOT: v128
133; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128){{$}}
134; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
135; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
136; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
137; SIMD128-NEXT: return $pop[[R]]{{$}}
138define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
139  %q = ptrtoint i8* %p to i32
140  %r = add nsw i32 %q, 16
141  %s = inttoptr i32 %r to i8*
142  %e = load i8, i8* %s
143  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
144  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
145  ret <16 x i8> %v2
146}
147
148; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
149; NO-SIMD128-NOT: v128
150; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}}
151; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
152; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
153; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
154; SIMD128-NEXT: return $pop[[R]]{{$}}
155define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
156  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
157  %v = load <16 x i8>, <16 x i8>* %s
158  ret <16 x i8> %v
159}
160
161; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
162; NO-SIMD128-NOT: v128
163; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}}
164; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 1{{$}}
165; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
166; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
167; SIMD128-NEXT: return $pop[[R]]{{$}}
168define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
169  %s = getelementptr i8, i8* %p, i32 1
170  %e = load i8, i8* %s
171  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
172  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
173  ret <16 x i8> %v2
174}
175
176; CHECK-LABEL: load_v16i8_from_numeric_address:
177; NO-SIMD128-NOT: v128
178; SIMD128-NEXT: .functype load_v16i8_from_numeric_address () -> (v128){{$}}
179; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
180; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
181; SIMD128-NEXT: return $pop[[R]]{{$}}
182define <16 x i8> @load_v16i8_from_numeric_address() {
183  %s = inttoptr i32 32 to <16 x i8>*
184  %v = load <16 x i8>, <16 x i8>* %s
185  ret <16 x i8> %v
186}
187
188; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
189; NO-SIMD128-NOT: v128
190; SIMD128-NEXT: .functype load_splat_v16i8_from_numeric_address () -> (v128){{$}}
191; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
192; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
193; SIMD128-NEXT: return $pop[[R]]{{$}}
194define <16 x i8> @load_splat_v16i8_from_numeric_address() {
195  %s = inttoptr i32 32 to i8*
196  %e = load i8, i8* %s
197  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
198  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
199  ret <16 x i8> %v2
200}
201
202; CHECK-LABEL: load_v16i8_from_global_address:
203; NO-SIMD128-NOT: v128
204; SIMD128-NEXT: .functype load_v16i8_from_global_address () -> (v128){{$}}
205; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
206; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v16i8($pop[[L0]]){{$}}
207; SIMD128-NEXT: return $pop[[R]]{{$}}
208@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
209define <16 x i8> @load_v16i8_from_global_address() {
210  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
211  ret <16 x i8> %v
212}
213
214; CHECK-LABEL: load_splat_v16i8_from_global_address:
215; NO-SIMD128-NOT: v128
216; SIMD128-NEXT: .functype load_splat_v16i8_from_global_address () -> (v128){{$}}
217; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
218; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, gv_i8($pop[[L0]]){{$}}
219; SIMD128-NEXT: return $pop[[R]]{{$}}
220@gv_i8 = global i8 42
221define <16 x i8> @load_splat_v16i8_from_global_address() {
222  %e = load i8, i8* @gv_i8
223  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
224  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
225  ret <16 x i8> %v2
226}
227
228; CHECK-LABEL: store_v16i8:
229; NO-SIMD128-NOT: v128
230; SIMD128-NEXT: .functype store_v16i8 (v128, i32) -> (){{$}}
231; SIMD128-NEXT: v128.store 0($1), $0{{$}}
232define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
233  store <16 x i8> %v , <16 x i8>* %p
234  ret void
235}
236
237; CHECK-LABEL: store_v16i8_with_folded_offset:
238; NO-SIMD128-NOT: v128
239; SIMD128-NEXT: .functype store_v16i8_with_folded_offset (v128, i32) -> (){{$}}
240; SIMD128-NEXT: v128.store 16($1), $0{{$}}
241define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
242  %q = ptrtoint <16 x i8>* %p to i32
243  %r = add nuw i32 %q, 16
244  %s = inttoptr i32 %r to <16 x i8>*
245  store <16 x i8> %v , <16 x i8>* %s
246  ret void
247}
248
249; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
250; NO-SIMD128-NOT: v128
251; SIMD128-NEXT: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> (){{$}}
252; SIMD128-NEXT: v128.store 16($1), $0{{$}}
253define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
254  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
255  store <16 x i8> %v , <16 x i8>* %s
256  ret void
257}
258
259; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
260; NO-SIMD128-NOT: v128
261; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
262; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
263; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
264; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
265define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
266  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
267  store <16 x i8> %v , <16 x i8>* %s
268  ret void
269}
270
271; CHECK-LABEL: store_v16i8_with_unfolded_offset:
272; NO-SIMD128-NOT: v128
273; SIMD128-NEXT: .functype store_v16i8_with_unfolded_offset (v128, i32) -> (){{$}}
274; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
275; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
276; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
277define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
278  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
279  store <16 x i8> %v , <16 x i8>* %s
280  ret void
281}
282
283; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
284; NO-SIMD128-NOT: v128
285; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> (){{$}}
286; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
287; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
288; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
289define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
290  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
291  store <16 x i8> %v , <16 x i8>* %s
292  ret void
293}
294
295; CHECK-LABEL: store_v16i8_to_numeric_address:
296; NO-SIMD128-NOT: v128
297; SIMD128-NEXT: .functype store_v16i8_to_numeric_address (v128) -> (){{$}}
298; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
299; SIMD128-NEXT: v128.store 32($pop[[R]]), $0{{$}}
300define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
301  %s = inttoptr i32 32 to <16 x i8>*
302  store <16 x i8> %v , <16 x i8>* %s
303  ret void
304}
305
306; CHECK-LABEL: store_v16i8_to_global_address:
307; NO-SIMD128-NOT: v128
308; SIMD128-NEXT: .functype store_v16i8_to_global_address (v128) -> (){{$}}
309; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
310; SIMD128-NEXT: v128.store gv_v16i8($pop[[R]]), $0{{$}}
311define void @store_v16i8_to_global_address(<16 x i8> %v) {
312  store <16 x i8> %v , <16 x i8>* @gv_v16i8
313  ret void
314}
315
316; ==============================================================================
317; 8 x i16
318; ==============================================================================
319; CHECK-LABEL: load_v8i16:
320; NO-SIMD128-NOT: v128
321; SIMD128-NEXT: .functype load_v8i16 (i32) -> (v128){{$}}
322; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
323; SIMD128-NEXT: return $pop[[R]]{{$}}
324define <8 x i16> @load_v8i16(<8 x i16>* %p) {
325  %v = load <8 x i16>, <8 x i16>* %p
326  ret <8 x i16> %v
327}
328
329; CHECK-LABEL: load_splat_v8i16:
330; NO-SIMD128-NOT: v128
331; SIMD128-NEXT: .functype load_splat_v8i16 (i32) -> (v128){{$}}
332; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
333; SIMD128-NEXT: return $pop[[R]]{{$}}
334define <8 x i16> @load_splat_v8i16(i16* %p) {
335  %e = load i16, i16* %p
336  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
337  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
338  ret <8 x i16> %v2
339}
340
341; CHECK-LABEL: load_sext_v8i16:
342; NO-SIMD128-NOT: v128
343; SIMD128-NEXT: .functype load_sext_v8i16 (i32) -> (v128){{$}}
344; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($0){{$}}
345; SIMD128-NEXT: return $pop[[R]]{{$}}
346define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
347  %v = load <8 x i8>, <8 x i8>* %p
348  %v2 = sext <8 x i8> %v to <8 x i16>
349  ret <8 x i16> %v2
350}
351
352; CHECK-LABEL: load_zext_v8i16:
353; NO-SIMD128-NOT: v128
354; SIMD128-NEXT: .functype load_zext_v8i16 (i32) -> (v128){{$}}
355; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($0){{$}}
356; SIMD128-NEXT: return $pop[[R]]{{$}}
357define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
358  %v = load <8 x i8>, <8 x i8>* %p
359  %v2 = zext <8 x i8> %v to <8 x i16>
360  ret <8 x i16> %v2
361}
362
363; CHECK-LABEL: load_ext_v8i16:
364; NO-SIMD128-NOT: load8x8
365; SIMD128-NEXT: .functype load_ext_v8i16 (i32) -> (v128){{$}}
366; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($0){{$}}
367; SIMD128-NEXT: return $pop[[R]]{{$}}
368define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
369  %v = load <8 x i8>, <8 x i8>* %p
370  ret <8 x i8> %v
371}
372
373; CHECK-LABEL: load_v8i16_with_folded_offset:
374; NO-SIMD128-NOT: v128
375; SIMD128-NEXT: .functype load_v8i16_with_folded_offset (i32) -> (v128){{$}}
376; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
377; SIMD128-NEXT: return $pop[[R]]{{$}}
378define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
379  %q = ptrtoint <8 x i16>* %p to i32
380  %r = add nuw i32 %q, 16
381  %s = inttoptr i32 %r to <8 x i16>*
382  %v = load <8 x i16>, <8 x i16>* %s
383  ret <8 x i16> %v
384}
385
386; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
387; NO-SIMD128-NOT: v128
388; SIMD128-NEXT: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128){{$}}
389; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
390; SIMD128-NEXT: return $pop[[R]]{{$}}
391define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
392  %q = ptrtoint i16* %p to i32
393  %r = add nuw i32 %q, 16
394  %s = inttoptr i32 %r to i16*
395  %e = load i16, i16* %s
396  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
397  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
398  ret <8 x i16> %v2
399}
400
401; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
402; NO-SIMD128-NOT: v128
403; SIMD128-NEXT: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128){{$}}
404; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 16($0){{$}}
405; SIMD128-NEXT: return $pop[[R]]{{$}}
406define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
407  %q = ptrtoint <8 x i8>* %p to i32
408  %r = add nuw i32 %q, 16
409  %s = inttoptr i32 %r to <8 x i8>*
410  %v = load <8 x i8>, <8 x i8>* %s
411  %v2 = sext <8 x i8> %v to <8 x i16>
412  ret <8 x i16> %v2
413}
414
415; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
416; NO-SIMD128-NOT: v128
417; SIMD128-NEXT: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128){{$}}
418; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 16($0){{$}}
419; SIMD128-NEXT: return $pop[[R]]{{$}}
420define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
421  %q = ptrtoint <8 x i8>* %p to i32
422  %r = add nuw i32 %q, 16
423  %s = inttoptr i32 %r to <8 x i8>*
424  %v = load <8 x i8>, <8 x i8>* %s
425  %v2 = zext <8 x i8> %v to <8 x i16>
426  ret <8 x i16> %v2
427}
428
429; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
430; NO-SIMD128-NOT: load8x8
431; SIMD128-NEXT: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128){{$}}
432; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 16($0){{$}}
433; SIMD128-NEXT: return $pop[[R]]{{$}}
434define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
435  %q = ptrtoint <8 x i8>* %p to i32
436  %r = add nuw i32 %q, 16
437  %s = inttoptr i32 %r to <8 x i8>*
438  %v = load <8 x i8>, <8 x i8>* %s
439  ret <8 x i8> %v
440}
441
442; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
443; NO-SIMD128-NOT: v128
444; SIMD128-NEXT: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
445; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
446; SIMD128-NEXT: return $pop[[R]]{{$}}
447define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
448  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
449  %v = load <8 x i16>, <8 x i16>* %s
450  ret <8 x i16> %v
451}
452
453; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
454; NO-SIMD128-NOT: v128
455; SIMD128-NEXT: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
456; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 2($0){{$}}
457; SIMD128-NEXT: return $pop[[R]]{{$}}
458define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
459  %s = getelementptr inbounds i16, i16* %p, i32 1
460  %e = load i16, i16* %s
461  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
462  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
463  ret <8 x i16> %v2
464}
465
466; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
467; NO-SIMD128-NOT: v128
468; SIMD128-NEXT: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
469; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 8($0){{$}}
470; SIMD128-NEXT: return $pop[[R]]{{$}}
471define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
472  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
473  %v = load <8 x i8>, <8 x i8>* %s
474  %v2 = sext <8 x i8> %v to <8 x i16>
475  ret <8 x i16> %v2
476}
477
478; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
479; NO-SIMD128-NOT: v128
480; SIMD128-NEXT: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
481; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 8($0){{$}}
482; SIMD128-NEXT: return $pop[[R]]{{$}}
483define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
484  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
485  %v = load <8 x i8>, <8 x i8>* %s
486  %v2 = zext <8 x i8> %v to <8 x i16>
487  ret <8 x i16> %v2
488}
489
490; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
491; NO-SIMD128-NOT: load8x8
492; SIMD128-NEXT: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
493; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 8($0){{$}}
494; SIMD128-NEXT: return $pop[[R]]{{$}}
495define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
496  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
497  %v = load <8 x i8>, <8 x i8>* %s
498  ret <8 x i8> %v
499}
500
501; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
502; NO-SIMD128-NOT: v128
503; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
504; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
505; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
506; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
507; SIMD128-NEXT: return $pop[[R]]{{$}}
508define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
509  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
510  %v = load <8 x i16>, <8 x i16>* %s
511  ret <8 x i16> %v
512}
513
514; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
515; NO-SIMD128-NOT: v128
516; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
517; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -2{{$}}
518; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
519; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
520; SIMD128-NEXT: return $pop[[R]]{{$}}
521define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
522  %s = getelementptr inbounds i16, i16* %p, i32 -1
523  %e = load i16, i16* %s
524  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
525  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
526  ret <8 x i16> %v2
527}
528
529; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
530; NO-SIMD128-NOT: v128
531; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
532; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
533; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
534; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
535; SIMD128-NEXT: return $pop[[R]]{{$}}
536define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
537  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
538  %v = load <8 x i8>, <8 x i8>* %s
539  %v2 = sext <8 x i8> %v to <8 x i16>
540  ret <8 x i16> %v2
541}
542
543; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
544; NO-SIMD128-NOT: v128
545; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
546; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
547; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
548; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
549; SIMD128-NEXT: return $pop[[R]]{{$}}
550define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
551  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
552  %v = load <8 x i8>, <8 x i8>* %s
553  %v2 = zext <8 x i8> %v to <8 x i16>
554  ret <8 x i16> %v2
555}
556
557; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
558; NO-SIMD128-NOT: load8x8
559; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
560; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
561; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
562; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
563; SIMD128-NEXT: return $pop[[R]]{{$}}
564define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
565  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
566  %v = load <8 x i8>, <8 x i8>* %s
567  ret <8 x i8> %v
568}
569
570; CHECK-LABEL: load_v8i16_with_unfolded_offset:
571; NO-SIMD128-NOT: v128
572; SIMD128-NEXT: .functype load_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
573; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
574; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
575; SIMD128-NEXT: v128.load $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
576; SIMD128-NEXT: return $pop[[L0]]{{$}}
577define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
578  %q = ptrtoint <8 x i16>* %p to i32
579  %r = add nsw i32 %q, 16
580  %s = inttoptr i32 %r to <8 x i16>*
581  %v = load <8 x i16>, <8 x i16>* %s
582  ret <8 x i16> %v
583}
584
585; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
586; NO-SIMD128-NOT: v128
587; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
588; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
589; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
590; SIMD128-NEXT: v16x8.load_splat $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
591; SIMD128-NEXT: return $pop[[L0]]{{$}}
592define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
593  %q = ptrtoint i16* %p to i32
594  %r = add nsw i32 %q, 16
595  %s = inttoptr i32 %r to i16*
596  %e = load i16, i16* %s
597  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
598  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
599  ret <8 x i16> %v2
600}
601
602; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
603; NO-SIMD128-NOT: v128
604; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
605; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
606; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
607; SIMD128-NEXT: i16x8.load8x8_s $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
608; SIMD128-NEXT: return $pop[[L0]]{{$}}
609define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
610  %q = ptrtoint <8 x i8>* %p to i32
611  %r = add nsw i32 %q, 16
612  %s = inttoptr i32 %r to <8 x i8>*
613  %v = load <8 x i8>, <8 x i8>* %s
614  %v2 = sext <8 x i8> %v to <8 x i16>
615  ret <8 x i16> %v2
616}
617
618; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
619; NO-SIMD128-NOT: v128
620; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
621; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
622; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
623; SIMD128-NEXT: i16x8.load8x8_u $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
624; SIMD128-NEXT: return $pop[[L0]]{{$}}
625define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
626  %q = ptrtoint <8 x i8>* %p to i32
627  %r = add nsw i32 %q, 16
628  %s = inttoptr i32 %r to <8 x i8>*
629  %v = load <8 x i8>, <8 x i8>* %s
630  %v2 = zext <8 x i8> %v to <8 x i16>
631  ret <8 x i16> %v2
632}
633
634; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
635; NO-SIMD128-NOT: load8x8
636; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
637; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
638; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
639; SIMD128-NEXT: i16x8.load8x8_u $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
640; SIMD128-NEXT: return $pop[[L0]]{{$}}
641define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
642  %q = ptrtoint <8 x i8>* %p to i32
643  %r = add nsw i32 %q, 16
644  %s = inttoptr i32 %r to <8 x i8>*
645  %v = load <8 x i8>, <8 x i8>* %s
646  ret <8 x i8> %v
647}
648
649; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
650; NO-SIMD128-NOT: v128
651; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
652; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
653; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
654; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
655; SIMD128-NEXT: return $pop[[R]]{{$}}
656define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
657  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
658  %v = load <8 x i16>, <8 x i16>* %s
659  ret <8 x i16> %v
660}
661
662; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
663; NO-SIMD128-NOT: v128
664; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
665; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 2{{$}}
666; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
667; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
668; SIMD128-NEXT: return $pop[[R]]{{$}}
669define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
670  %s = getelementptr i16, i16* %p, i32 1
671  %e = load i16, i16* %s
672  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
673  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
674  ret <8 x i16> %v2
675}
676
677; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
678; NO-SIMD128-NOT: v128
679; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
680; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
681; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
682; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
683; SIMD128-NEXT: return $pop[[R]]{{$}}
684define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
685  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
686  %v = load <8 x i8>, <8 x i8>* %s
687  %v2 = sext <8 x i8> %v to <8 x i16>
688  ret <8 x i16> %v2
689}
690
691; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
692; NO-SIMD128-NOT: v128
693; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
694; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
695; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
696; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
697; SIMD128-NEXT: return $pop[[R]]{{$}}
698define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
699  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
700  %v = load <8 x i8>, <8 x i8>* %s
701  %v2 = zext <8 x i8> %v to <8 x i16>
702  ret <8 x i16> %v2
703}
704
705; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
706; NO-SIMD128-NOT: load8x8
707; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
708; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
709; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
710; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
711; SIMD128-NEXT: return $pop[[R]]{{$}}
712define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
713  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
714  %v = load <8 x i8>, <8 x i8>* %s
715  ret <8 x i8> %v
716}
717
718; CHECK-LABEL: load_v8i16_from_numeric_address:
719; NO-SIMD128-NOT: v128
720; SIMD128-NEXT: .functype load_v8i16_from_numeric_address () -> (v128){{$}}
721; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
722; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
723; SIMD128-NEXT: return $pop[[R]]{{$}}
724define <8 x i16> @load_v8i16_from_numeric_address() {
725  %s = inttoptr i32 32 to <8 x i16>*
726  %v = load <8 x i16>, <8 x i16>* %s
727  ret <8 x i16> %v
728}
729
730; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
731; NO-SIMD128-NOT: v128
732; SIMD128-NEXT: .functype load_splat_v8i16_from_numeric_address () -> (v128){{$}}
733; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
734; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
735; SIMD128-NEXT: return $pop[[R]]{{$}}
736define <8 x i16> @load_splat_v8i16_from_numeric_address() {
737  %s = inttoptr i32 32 to i16*
738  %e = load i16, i16* %s
739  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
740  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
741  ret <8 x i16> %v2
742}
743
744; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
745; NO-SIMD128-NOT: v128
746; SIMD128-NEXT: .functype load_sext_v8i16_from_numeric_address () -> (v128){{$}}
747; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
748; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
749; SIMD128-NEXT: return $pop[[R]]{{$}}
750define <8 x i16> @load_sext_v8i16_from_numeric_address() {
751  %s = inttoptr i32 32 to <8 x i8>*
752  %v = load <8 x i8>, <8 x i8>* %s
753  %v2 = sext <8 x i8> %v to <8 x i16>
754  ret <8 x i16> %v2
755}
756
757; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
758; NO-SIMD128-NOT: v128
759; SIMD128-NEXT: .functype load_zext_v8i16_from_numeric_address () -> (v128){{$}}
760; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
761; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
762; SIMD128-NEXT: return $pop[[R]]{{$}}
763define <8 x i16> @load_zext_v8i16_from_numeric_address() {
764  %s = inttoptr i32 32 to <8 x i8>*
765  %v = load <8 x i8>, <8 x i8>* %s
766  %v2 = zext <8 x i8> %v to <8 x i16>
767  ret <8 x i16> %v2
768}
769
770; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
771; NO-SIMD128-NOT: load8x8
772; SIMD128-NEXT: .functype load_ext_v8i16_from_numeric_address () -> (v128){{$}}
773; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
774; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
775; SIMD128-NEXT: return $pop[[R]]{{$}}
776define <8 x i8> @load_ext_v8i16_from_numeric_address() {
777  %s = inttoptr i32 32 to <8 x i8>*
778  %v = load <8 x i8>, <8 x i8>* %s
779  ret <8 x i8> %v
780}
781
782; CHECK-LABEL: load_v8i16_from_global_address:
783; NO-SIMD128-NOT: v128
784; SIMD128-NEXT: .functype load_v8i16_from_global_address () -> (v128){{$}}
785; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
786; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v8i16($pop[[L0]]){{$}}
787; SIMD128-NEXT: return $pop[[R]]{{$}}
788@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
789define <8 x i16> @load_v8i16_from_global_address() {
790  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
791  ret <8 x i16> %v
792}
793
794; CHECK-LABEL: load_splat_v8i16_from_global_address:
795; NO-SIMD128-NOT: v128
796; SIMD128-NEXT: .functype load_splat_v8i16_from_global_address () -> (v128){{$}}
797; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
798; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, gv_i16($pop[[L0]]){{$}}
799; SIMD128-NEXT: return $pop[[R]]{{$}}
800@gv_i16 = global i16 42
801define <8 x i16> @load_splat_v8i16_from_global_address() {
802  %e = load i16, i16* @gv_i16
803  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
804  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
805  ret <8 x i16> %v2
806}
807
808; CHECK-LABEL: load_sext_v8i16_from_global_address:
809; NO-SIMD128-NOT: v128
810; SIMD128-NEXT: .functype load_sext_v8i16_from_global_address () -> (v128){{$}}
811; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
812; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}}
813; SIMD128-NEXT: return $pop[[R]]{{$}}
814@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
815define <8 x i16> @load_sext_v8i16_from_global_address() {
816  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
817  %v2 = sext <8 x i8> %v to <8 x i16>
818  ret <8 x i16> %v2
819}
820
821; CHECK-LABEL: load_zext_v8i16_from_global_address:
822; NO-SIMD128-NOT: v128
823; SIMD128-NEXT: .functype load_zext_v8i16_from_global_address () -> (v128){{$}}
824; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
825; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}}
826; SIMD128-NEXT: return $pop[[R]]{{$}}
827define <8 x i16> @load_zext_v8i16_from_global_address() {
828  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
829  %v2 = zext <8 x i8> %v to <8 x i16>
830  ret <8 x i16> %v2
831}
832
833; CHECK-LABEL: load_ext_v8i16_from_global_address:
834; NO-SIMD128-NOT: load8x8
835; SIMD128-NEXT: .functype load_ext_v8i16_from_global_address () -> (v128){{$}}
836; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
837; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}}
838; SIMD128-NEXT: return $pop[[R]]{{$}}
839define <8 x i8> @load_ext_v8i16_from_global_address() {
840  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
841  ret <8 x i8> %v
842}
843
844
845; CHECK-LABEL: store_v8i16:
846; NO-SIMD128-NOT: v128
847; SIMD128-NEXT: .functype store_v8i16 (v128, i32) -> (){{$}}
848; SIMD128-NEXT: v128.store 0($1), $0{{$}}
849define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
850  store <8 x i16> %v , <8 x i16>* %p
851  ret void
852}
853
854; CHECK-LABEL: store_v8i16_with_folded_offset:
855; NO-SIMD128-NOT: v128
856; SIMD128-NEXT: .functype store_v8i16_with_folded_offset (v128, i32) -> (){{$}}
857; SIMD128-NEXT: v128.store 16($1), $0{{$}}
858define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
859  %q = ptrtoint <8 x i16>* %p to i32
860  %r = add nuw i32 %q, 16
861  %s = inttoptr i32 %r to <8 x i16>*
862  store <8 x i16> %v , <8 x i16>* %s
863  ret void
864}
865
866; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
867; NO-SIMD128-NOT: v128
868; SIMD128-NEXT: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> (){{$}}
869; SIMD128-NEXT: v128.store 16($1), $0{{$}}
870define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
871  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
872  store <8 x i16> %v , <8 x i16>* %s
873  ret void
874}
875
876; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
877; NO-SIMD128-NOT: v128
878; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
879; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
880; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
881; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
882define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
883  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
884  store <8 x i16> %v , <8 x i16>* %s
885  ret void
886}
887
888; CHECK-LABEL: store_v8i16_with_unfolded_offset:
889; NO-SIMD128-NOT: v128
890; SIMD128-NEXT: .functype store_v8i16_with_unfolded_offset (v128, i32) -> (){{$}}
891; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
892; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
893; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
894define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
895  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
896  store <8 x i16> %v , <8 x i16>* %s
897  ret void
898}
899
900; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
901; NO-SIMD128-NOT: v128
902; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> (){{$}}
903; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
904; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
905; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
906define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
907  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
908  store <8 x i16> %v , <8 x i16>* %s
909  ret void
910}
911
912; CHECK-LABEL: store_v8i16_to_numeric_address:
913; NO-SIMD128-NOT: v128
914; SIMD128-NEXT: .functype store_v8i16_to_numeric_address (v128) -> (){{$}}
915; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
916; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
917define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
918  %s = inttoptr i32 32 to <8 x i16>*
919  store <8 x i16> %v , <8 x i16>* %s
920  ret void
921}
922
923; CHECK-LABEL: store_v8i16_to_global_address:
924; NO-SIMD128-NOT: v128
925; SIMD128-NEXT: .functype store_v8i16_to_global_address (v128) -> (){{$}}
926; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
927; SIMD128-NEXT: v128.store gv_v8i16($pop[[R]]), $0{{$}}
928define void @store_v8i16_to_global_address(<8 x i16> %v) {
929  store <8 x i16> %v , <8 x i16>* @gv_v8i16
930  ret void
931}
932
933; ==============================================================================
934; 4 x i32
935; ==============================================================================
936; CHECK-LABEL: load_v4i32:
937; NO-SIMD128-NOT: v128
938; SIMD128-NEXT: .functype load_v4i32 (i32) -> (v128){{$}}
939; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
940; SIMD128-NEXT: return $pop[[R]]{{$}}
941define <4 x i32> @load_v4i32(<4 x i32>* %p) {
942  %v = load <4 x i32>, <4 x i32>* %p
943  ret <4 x i32> %v
944}
945
946; CHECK-LABEL: load_splat_v4i32:
947; NO-SIMD128-NOT: v128
948; SIMD128-NEXT: .functype load_splat_v4i32 (i32) -> (v128){{$}}
949; SIMD128-NEXT: v32x4.load_splat
950define <4 x i32> @load_splat_v4i32(i32* %addr) {
951  %e = load i32, i32* %addr, align 4
952  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
953  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
954  ret <4 x i32> %v2
955}
956
957; CHECK-LABEL: load_sext_v4i32:
958; NO-SIMD128-NOT: v128
959; SIMD128-NEXT: .functype load_sext_v4i32 (i32) -> (v128){{$}}
960; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($0){{$}}
961; SIMD128-NEXT: return $pop[[R]]{{$}}
962define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
963  %v = load <4 x i16>, <4 x i16>* %p
964  %v2 = sext <4 x i16> %v to <4 x i32>
965  ret <4 x i32> %v2
966}
967
968; CHECK-LABEL: load_zext_v4i32:
969; NO-SIMD128-NOT: v128
970; SIMD128-NEXT: .functype load_zext_v4i32 (i32) -> (v128){{$}}
971; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($0){{$}}
972; SIMD128-NEXT: return $pop[[R]]{{$}}
973define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
974  %v = load <4 x i16>, <4 x i16>* %p
975  %v2 = zext <4 x i16> %v to <4 x i32>
976  ret <4 x i32> %v2
977}
978
979; CHECK-LABEL: load_ext_v4i32:
980; NO-SIMD128-NOT: load16x4
981; SIMD128-NEXT: .functype load_ext_v4i32 (i32) -> (v128){{$}}
982; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($0){{$}}
983; SIMD128-NEXT: return $pop[[R]]{{$}}
984define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
985  %v = load <4 x i16>, <4 x i16>* %p
986  ret <4 x i16> %v
987}
988
989; CHECK-LABEL: load_v4i32_with_folded_offset:
990; NO-SIMD128-NOT: v128
991; SIMD128-NEXT: .functype load_v4i32_with_folded_offset (i32) -> (v128){{$}}
992; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
993; SIMD128-NEXT: return $pop[[R]]{{$}}
994define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
995  %q = ptrtoint <4 x i32>* %p to i32
996  %r = add nuw i32 %q, 16
997  %s = inttoptr i32 %r to <4 x i32>*
998  %v = load <4 x i32>, <4 x i32>* %s
999  ret <4 x i32> %v
1000}
1001
1002; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1003; NO-SIMD128-NOT: v128
1004; SIMD128-NEXT: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128){{$}}
1005; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
1006; SIMD128-NEXT: return $pop[[R]]{{$}}
1007define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
1008  %q = ptrtoint i32* %p to i32
1009  %r = add nuw i32 %q, 16
1010  %s = inttoptr i32 %r to i32*
1011  %e = load i32, i32* %s
1012  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1013  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1014  ret <4 x i32> %v2
1015}
1016
1017; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
1018; NO-SIMD128-NOT: v128
1019; SIMD128-NEXT: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128){{$}}
1020; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 16($0){{$}}
1021; SIMD128-NEXT: return $pop[[R]]{{$}}
1022define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
1023  %q = ptrtoint <4 x i16>* %p to i32
1024  %r = add nuw i32 %q, 16
1025  %s = inttoptr i32 %r to <4 x i16>*
1026  %v = load <4 x i16>, <4 x i16>* %s
1027  %v2 = sext <4 x i16> %v to <4 x i32>
1028  ret <4 x i32> %v2
1029}
1030
1031; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
1032; NO-SIMD128-NOT: v128
1033; SIMD128-NEXT: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128){{$}}
1034; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 16($0){{$}}
1035; SIMD128-NEXT: return $pop[[R]]{{$}}
1036define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
1037  %q = ptrtoint <4 x i16>* %p to i32
1038  %r = add nuw i32 %q, 16
1039  %s = inttoptr i32 %r to <4 x i16>*
1040  %v = load <4 x i16>, <4 x i16>* %s
1041  %v2 = zext <4 x i16> %v to <4 x i32>
1042  ret <4 x i32> %v2
1043}
1044
1045; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1046; NO-SIMD128-NOT: load16x4
1047; SIMD128-NEXT: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128){{$}}
1048; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 16($0){{$}}
1049; SIMD128-NEXT: return $pop[[R]]{{$}}
1050define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
1051  %q = ptrtoint <4 x i16>* %p to i32
1052  %r = add nuw i32 %q, 16
1053  %s = inttoptr i32 %r to <4 x i16>*
1054  %v = load <4 x i16>, <4 x i16>* %s
1055  ret <4 x i16> %v
1056}
1057
1058; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1059; NO-SIMD128-NOT: v128
1060; SIMD128-NEXT: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1061; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1062; SIMD128-NEXT: return $pop[[R]]{{$}}
1063define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
1064  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1065  %v = load <4 x i32>, <4 x i32>* %s
1066  ret <4 x i32> %v
1067}
1068
1069; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1070; NO-SIMD128-NOT: v128
1071; SIMD128-NEXT: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1072; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 4($0){{$}}
1073; SIMD128-NEXT: return $pop[[R]]{{$}}
1074define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
1075  %s = getelementptr inbounds i32, i32* %p, i32 1
1076  %e = load i32, i32* %s
1077  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1078  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1079  ret <4 x i32> %v2
1080}
1081
1082; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
1083; NO-SIMD128-NOT: v128
1084; SIMD128-NEXT: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1085; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 8($0){{$}}
1086; SIMD128-NEXT: return $pop[[R]]{{$}}
1087define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1088  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1089  %v = load <4 x i16>, <4 x i16>* %s
1090  %v2 = sext <4 x i16> %v to <4 x i32>
1091  ret <4 x i32> %v2
1092}
1093
1094; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
1095; NO-SIMD128-NOT: v128
1096; SIMD128-NEXT: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1097; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 8($0){{$}}
1098; SIMD128-NEXT: return $pop[[R]]{{$}}
1099define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1100  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1101  %v = load <4 x i16>, <4 x i16>* %s
1102  %v2 = zext <4 x i16> %v to <4 x i32>
1103  ret <4 x i32> %v2
1104}
1105
1106; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1107; NO-SIMD128-NOT: load16x4
1108; SIMD128-NEXT: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1109; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 8($0){{$}}
1110; SIMD128-NEXT: return $pop[[R]]{{$}}
1111define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1112  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1113  %v = load <4 x i16>, <4 x i16>* %s
1114  ret <4 x i16> %v
1115}
1116
1117; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1118; NO-SIMD128-NOT: v128
1119; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1120; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1121; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1122; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1123; SIMD128-NEXT: return $pop[[R]]{{$}}
1124define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
1125  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1126  %v = load <4 x i32>, <4 x i32>* %s
1127  ret <4 x i32> %v
1128}
1129
1130; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1131; NO-SIMD128-NOT: v128
1132; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1133; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -4{{$}}
1134; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1135; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1136; SIMD128-NEXT: return $pop[[R]]{{$}}
1137define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
1138  %s = getelementptr inbounds i32, i32* %p, i32 -1
1139  %e = load i32, i32* %s
1140  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1141  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1142  ret <4 x i32> %v2
1143}
1144
1145; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
1146; NO-SIMD128-NOT: v128
1147; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1148; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1149; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1150; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1151; SIMD128-NEXT: return $pop[[R]]{{$}}
1152define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1153  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1154  %v = load <4 x i16>, <4 x i16>* %s
1155  %v2 = sext <4 x i16> %v to <4 x i32>
1156  ret <4 x i32> %v2
1157}
1158
1159; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
1160; NO-SIMD128-NOT: v128
1161; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1162; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1163; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1164; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1165; SIMD128-NEXT: return $pop[[R]]{{$}}
1166define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1167  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1168  %v = load <4 x i16>, <4 x i16>* %s
1169  %v2 = zext <4 x i16> %v to <4 x i32>
1170  ret <4 x i32> %v2
1171}
1172
1173; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1174; NO-SIMD128-NOT: load16x4
1175; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1176; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1177; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1178; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1179; SIMD128-NEXT: return $pop[[R]]{{$}}
1180define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1181  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1182  %v = load <4 x i16>, <4 x i16>* %s
1183  ret <4 x i16> %v
1184}
1185
1186; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1187; NO-SIMD128-NOT: v128
1188; SIMD128-NEXT: .functype load_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1189; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1190; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1191; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1192; SIMD128-NEXT: return $pop[[R]]{{$}}
1193define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
1194  %q = ptrtoint <4 x i32>* %p to i32
1195  %r = add nsw i32 %q, 16
1196  %s = inttoptr i32 %r to <4 x i32>*
1197  %v = load <4 x i32>, <4 x i32>* %s
1198  ret <4 x i32> %v
1199}
1200
1201; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1202; NO-SIMD128-NOT: v128
1203; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1204; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1205; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1206; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1207; SIMD128-NEXT: return $pop[[R]]{{$}}
1208define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
1209  %q = ptrtoint i32* %p to i32
1210  %r = add nsw i32 %q, 16
1211  %s = inttoptr i32 %r to i32*
1212  %e = load i32, i32* %s
1213  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1214  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1215  ret <4 x i32> %v2
1216}
1217
1218; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
1219; NO-SIMD128-NOT: v128
1220; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1221; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1222; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1223; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1224; SIMD128-NEXT: return $pop[[R]]{{$}}
1225define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1226  %q = ptrtoint <4 x i16>* %p to i32
1227  %r = add nsw i32 %q, 16
1228  %s = inttoptr i32 %r to <4 x i16>*
1229  %v = load <4 x i16>, <4 x i16>* %s
1230  %v2 = sext <4 x i16> %v to <4 x i32>
1231  ret <4 x i32> %v2
1232}
1233
1234; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
1235; NO-SIMD128-NOT: v128
1236; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1237; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1238; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1239; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1240; SIMD128-NEXT: return $pop[[R]]{{$}}
1241define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1242  %q = ptrtoint <4 x i16>* %p to i32
1243  %r = add nsw i32 %q, 16
1244  %s = inttoptr i32 %r to <4 x i16>*
1245  %v = load <4 x i16>, <4 x i16>* %s
1246  %v2 = zext <4 x i16> %v to <4 x i32>
1247  ret <4 x i32> %v2
1248}
1249
1250; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1251; NO-SIMD128-NOT: load16x4
1252; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1253; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1254; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1255; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1256; SIMD128-NEXT: return $pop[[R]]{{$}}
1257define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1258  %q = ptrtoint <4 x i16>* %p to i32
1259  %r = add nsw i32 %q, 16
1260  %s = inttoptr i32 %r to <4 x i16>*
1261  %v = load <4 x i16>, <4 x i16>* %s
1262  ret <4 x i16> %v
1263}
1264
1265; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1266; NO-SIMD128-NOT: v128
1267; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1268; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1269; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1270; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1271; SIMD128-NEXT: return $pop[[R]]{{$}}
1272define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
1273  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1274  %v = load <4 x i32>, <4 x i32>* %s
1275  ret <4 x i32> %v
1276}
1277
1278; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1279; NO-SIMD128-NOT: v128
1280; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1281; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 4{{$}}
1282; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1283; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1284; SIMD128-NEXT: return $pop[[R]]{{$}}
1285define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
1286  %s = getelementptr i32, i32* %p, i32 1
1287  %e = load i32, i32* %s
1288  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1289  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1290  ret <4 x i32> %v2
1291}
1292
1293; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
1294; NO-SIMD128-NOT: v128
1295; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1296; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1297; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1298; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1299; SIMD128-NEXT: return $pop[[R]]{{$}}
1300define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1301  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1302  %v = load <4 x i16>, <4 x i16>* %s
1303  %v2 = sext <4 x i16> %v to <4 x i32>
1304  ret <4 x i32> %v2
1305}
1306
1307; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
1308; NO-SIMD128-NOT: v128
1309; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1310; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1311; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1312; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1313; SIMD128-NEXT: return $pop[[R]]{{$}}
1314define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1315  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1316  %v = load <4 x i16>, <4 x i16>* %s
1317  %v2 = zext <4 x i16> %v to <4 x i32>
1318  ret <4 x i32> %v2
1319}
1320
1321; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1322; NO-SIMD128-NOT: load16x4
1323; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1324; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1325; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1326; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1327; SIMD128-NEXT: return $pop[[R]]{{$}}
1328define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1329  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1330  %v = load <4 x i16>, <4 x i16>* %s
1331  ret <4 x i16> %v
1332}
1333
1334; CHECK-LABEL: load_v4i32_from_numeric_address:
1335; NO-SIMD128-NOT: v128
1336; SIMD128-NEXT: .functype load_v4i32_from_numeric_address () -> (v128){{$}}
1337; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1338; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1339; SIMD128-NEXT: return $pop[[R]]{{$}}
1340define <4 x i32> @load_v4i32_from_numeric_address() {
1341  %s = inttoptr i32 32 to <4 x i32>*
1342  %v = load <4 x i32>, <4 x i32>* %s
1343  ret <4 x i32> %v
1344}
1345
1346; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1347; NO-SIMD128-NOT: v128
1348; SIMD128-NEXT: .functype load_splat_v4i32_from_numeric_address () -> (v128){{$}}
1349; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1350; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1351; SIMD128-NEXT: return $pop[[R]]{{$}}
1352define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1353  %s = inttoptr i32 32 to i32*
1354  %e = load i32, i32* %s
1355  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1356  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1357  ret <4 x i32> %v2
1358}
1359
1360; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
1361; NO-SIMD128-NOT: v128
1362; SIMD128-NEXT: .functype load_sext_v4i32_from_numeric_address () -> (v128){{$}}
1363; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1364; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1365; SIMD128-NEXT: return $pop[[R]]{{$}}
1366define <4 x i32> @load_sext_v4i32_from_numeric_address() {
1367  %s = inttoptr i32 32 to <4 x i16>*
1368  %v = load <4 x i16>, <4 x i16>* %s
1369  %v2 = sext <4 x i16> %v to <4 x i32>
1370  ret <4 x i32> %v2
1371}
1372
1373; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
1374; NO-SIMD128-NOT: v128
1375; SIMD128-NEXT: .functype load_zext_v4i32_from_numeric_address () -> (v128){{$}}
1376; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1377; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1378; SIMD128-NEXT: return $pop[[R]]{{$}}
1379define <4 x i32> @load_zext_v4i32_from_numeric_address() {
1380  %s = inttoptr i32 32 to <4 x i16>*
1381  %v = load <4 x i16>, <4 x i16>* %s
1382  %v2 = zext <4 x i16> %v to <4 x i32>
1383  ret <4 x i32> %v2
1384}
1385
1386; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1387; NO-SIMD128-NOT: load16x4
1388; SIMD128-NEXT: .functype load_ext_v4i32_from_numeric_address () -> (v128){{$}}
1389; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1390; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1391; SIMD128-NEXT: return $pop[[R]]{{$}}
1392define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1393  %s = inttoptr i32 32 to <4 x i16>*
1394  %v = load <4 x i16>, <4 x i16>* %s
1395  ret <4 x i16> %v
1396}
1397
1398; CHECK-LABEL: load_v4i32_from_global_address:
1399; NO-SIMD128-NOT: v128
1400; SIMD128-NEXT: .functype load_v4i32_from_global_address () -> (v128){{$}}
1401; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1402; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4i32($pop[[L0]]){{$}}
1403; SIMD128-NEXT: return $pop[[R]]{{$}}
1404@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1405define <4 x i32> @load_v4i32_from_global_address() {
1406  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
1407  ret <4 x i32> %v
1408}
1409
1410; CHECK-LABEL: load_splat_v4i32_from_global_address:
1411; NO-SIMD128-NOT: v128
1412; SIMD128-NEXT: .functype load_splat_v4i32_from_global_address () -> (v128){{$}}
1413; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1414; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, gv_i32($pop[[L0]]){{$}}
1415; SIMD128-NEXT: return $pop[[R]]{{$}}
1416@gv_i32 = global i32 42
1417define <4 x i32> @load_splat_v4i32_from_global_address() {
1418  %e = load i32, i32* @gv_i32
1419  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1420  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1421  ret <4 x i32> %v2
1422}
1423
1424; CHECK-LABEL: load_sext_v4i32_from_global_address:
1425; NO-SIMD128-NOT: v128
1426; SIMD128-NEXT: .functype load_sext_v4i32_from_global_address () -> (v128){{$}}
1427; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1428; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}}
1429; SIMD128-NEXT: return $pop[[R]]{{$}}
1430@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1431define <4 x i32> @load_sext_v4i32_from_global_address() {
1432  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1433  %v2 = sext <4 x i16> %v to <4 x i32>
1434  ret <4 x i32> %v2
1435}
1436
1437; CHECK-LABEL: load_zext_v4i32_from_global_address:
1438; NO-SIMD128-NOT: v128
1439; SIMD128-NEXT: .functype load_zext_v4i32_from_global_address () -> (v128){{$}}
1440; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1441; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}}
1442; SIMD128-NEXT: return $pop[[R]]{{$}}
1443define <4 x i32> @load_zext_v4i32_from_global_address() {
1444  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1445  %v2 = zext <4 x i16> %v to <4 x i32>
1446  ret <4 x i32> %v2
1447}
1448
1449; CHECK-LABEL: load_ext_v4i32_from_global_address:
1450; NO-SIMD128-NOT: load16x4
1451; SIMD128-NEXT: .functype load_ext_v4i32_from_global_address () -> (v128){{$}}
1452; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1453; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}}
1454; SIMD128-NEXT: return $pop[[R]]{{$}}
1455define <4 x i16> @load_ext_v4i32_from_global_address() {
1456  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1457  ret <4 x i16> %v
1458}
1459
1460; CHECK-LABEL: store_v4i32:
1461; NO-SIMD128-NOT: v128
1462; SIMD128-NEXT: .functype store_v4i32 (v128, i32) -> (){{$}}
1463; SIMD128-NEXT: v128.store 0($1), $0{{$}}
1464define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
1465  store <4 x i32> %v , <4 x i32>* %p
1466  ret void
1467}
1468
1469; CHECK-LABEL: store_v4i32_with_folded_offset:
1470; NO-SIMD128-NOT: v128
1471; SIMD128-NEXT: .functype store_v4i32_with_folded_offset (v128, i32) -> (){{$}}
1472; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1473define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
1474  %q = ptrtoint <4 x i32>* %p to i32
1475  %r = add nuw i32 %q, 16
1476  %s = inttoptr i32 %r to <4 x i32>*
1477  store <4 x i32> %v , <4 x i32>* %s
1478  ret void
1479}
1480
1481; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
1482; NO-SIMD128-NOT: v128
1483; SIMD128-NEXT: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> (){{$}}
1484; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1485define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1486  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1487  store <4 x i32> %v , <4 x i32>* %s
1488  ret void
1489}
1490
1491; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
1492; NO-SIMD128-NOT: v128
1493; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
1494; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1495; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1496; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1497define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
1498  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1499  store <4 x i32> %v , <4 x i32>* %s
1500  ret void
1501}
1502
1503; CHECK-LABEL: store_v4i32_with_unfolded_offset:
1504; NO-SIMD128-NOT: v128
1505; SIMD128-NEXT: .functype store_v4i32_with_unfolded_offset (v128, i32) -> (){{$}}
1506; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1507; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1508; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1509define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
1510  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1511  store <4 x i32> %v , <4 x i32>* %s
1512  ret void
1513}
1514
1515; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
1516; NO-SIMD128-NOT: v128
1517; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> (){{$}}
1518; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1519; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1520; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1521define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1522  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1523  store <4 x i32> %v , <4 x i32>* %s
1524  ret void
1525}
1526
1527; CHECK-LABEL: store_v4i32_to_numeric_address:
1528; NO-SIMD128-NOT: v128
1529; SIMD128-NEXT: .functype store_v4i32_to_numeric_address (v128) -> (){{$}}
1530; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1531; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
1532define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
1533  %s = inttoptr i32 32 to <4 x i32>*
1534  store <4 x i32> %v , <4 x i32>* %s
1535  ret void
1536}
1537
1538; CHECK-LABEL: store_v4i32_to_global_address:
1539; NO-SIMD128-NOT: v128
1540; SIMD128-NEXT: .functype store_v4i32_to_global_address (v128) -> (){{$}}
1541; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
1542; SIMD128-NEXT: v128.store gv_v4i32($pop[[R]]), $0{{$}}
1543define void @store_v4i32_to_global_address(<4 x i32> %v) {
1544  store <4 x i32> %v , <4 x i32>* @gv_v4i32
1545  ret void
1546}
1547
1548; ==============================================================================
1549; 2 x i64
1550; ==============================================================================
1551; CHECK-LABEL: load_v2i64:
1552; NO-SIMD128-NOT: v128
1553; SIMD128-NEXT: .functype load_v2i64 (i32) -> (v128){{$}}
1554; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
1555; SIMD128-NEXT: return $pop[[R]]{{$}}
1556define <2 x i64> @load_v2i64(<2 x i64>* %p) {
1557  %v = load <2 x i64>, <2 x i64>* %p
1558  ret <2 x i64> %v
1559}
1560
1561; CHECK-LABEL: load_splat_v2i64:
1562; NO-SIMD128-NOT: v128
1563; SIMD128-NEXT: .functype load_splat_v2i64 (i32) -> (v128){{$}}
1564; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
1565; SIMD128-NEXT: return $pop[[R]]{{$}}
1566define <2 x i64> @load_splat_v2i64(i64* %p) {
1567  %e = load i64, i64* %p
1568  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1569  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1570  ret <2 x i64> %v2
1571}
1572
1573; CHECK-LABEL: load_sext_v2i64:
1574; NO-SIMD128-NOT: v128
1575; SIMD128-NEXT: .functype load_sext_v2i64 (i32) -> (v128){{$}}
1576; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($0){{$}}
1577; SIMD128-NEXT: return $pop[[R]]{{$}}
1578define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
1579  %v = load <2 x i32>, <2 x i32>* %p
1580  %v2 = sext <2 x i32> %v to <2 x i64>
1581  ret <2 x i64> %v2
1582}
1583
1584; CHECK-LABEL: load_zext_v2i64:
1585; NO-SIMD128-NOT: v128
1586; SIMD128-NEXT: .functype load_zext_v2i64 (i32) -> (v128){{$}}
1587; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($0){{$}}
1588; SIMD128-NEXT: return $pop[[R]]{{$}}
1589define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
1590  %v = load <2 x i32>, <2 x i32>* %p
1591  %v2 = zext <2 x i32> %v to <2 x i64>
1592  ret <2 x i64> %v2
1593}
1594
1595; CHECK-LABEL: load_ext_v2i64:
1596; NO-SIMD128-NOT: v128
1597; SIMD128-VM-NOT: load32x2
1598; SIMD128-NEXT: .functype load_ext_v2i64 (i32) -> (v128){{$}}
1599; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($0){{$}}
1600; SIMD128-NEXT: return $pop[[R]]{{$}}
1601define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
1602  %v = load <2 x i32>, <2 x i32>* %p
1603  ret <2 x i32> %v
1604}
1605
1606; CHECK-LABEL: load_v2i64_with_folded_offset:
1607; NO-SIMD128-NOT: v128
1608; SIMD128-NEXT: .functype load_v2i64_with_folded_offset (i32) -> (v128){{$}}
1609; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1610; SIMD128-NEXT: return $pop[[R]]{{$}}
1611define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
1612  %q = ptrtoint <2 x i64>* %p to i32
1613  %r = add nuw i32 %q, 16
1614  %s = inttoptr i32 %r to <2 x i64>*
1615  %v = load <2 x i64>, <2 x i64>* %s
1616  ret <2 x i64> %v
1617}
1618
1619; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
1620; NO-SIMD128-NOT: v128
1621; SIMD128-NEXT: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128){{$}}
1622; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
1623; SIMD128-NEXT: return $pop[[R]]{{$}}
1624define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
1625  %q = ptrtoint i64* %p to i32
1626  %r = add nuw i32 %q, 16
1627  %s = inttoptr i32 %r to i64*
1628  %e = load i64, i64* %s
1629  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1630  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1631  ret <2 x i64> %v2
1632}
1633
1634; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
1635; NO-SIMD128-NOT: v128
1636; SIMD128-NEXT: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128){{$}}
1637; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 16($0){{$}}
1638; SIMD128-NEXT: return $pop[[R]]{{$}}
1639define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
1640  %q = ptrtoint <2 x i32>* %p to i32
1641  %r = add nuw i32 %q, 16
1642  %s = inttoptr i32 %r to <2 x i32>*
1643  %v = load <2 x i32>, <2 x i32>* %s
1644  %v2 = sext <2 x i32> %v to <2 x i64>
1645  ret <2 x i64> %v2
1646}
1647
1648; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
1649; NO-SIMD128-NOT: v128
1650; SIMD128-NEXT: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128){{$}}
1651; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 16($0){{$}}
1652; SIMD128-NEXT: return $pop[[R]]{{$}}
1653define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
1654  %q = ptrtoint <2 x i32>* %p to i32
1655  %r = add nuw i32 %q, 16
1656  %s = inttoptr i32 %r to <2 x i32>*
1657  %v = load <2 x i32>, <2 x i32>* %s
1658  %v2 = zext <2 x i32> %v to <2 x i64>
1659  ret <2 x i64> %v2
1660}
1661
1662; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
1663; NO-SIMD128-NOT: v128
1664; SIMD128-VM-NOT: load32x2
1665; SIMD128-NEXT: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128){{$}}
1666; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 16($0){{$}}
1667; SIMD128-NEXT: return $pop[[R]]{{$}}
1668define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
1669  %q = ptrtoint <2 x i32>* %p to i32
1670  %r = add nuw i32 %q, 16
1671  %s = inttoptr i32 %r to <2 x i32>*
1672  %v = load <2 x i32>, <2 x i32>* %s
1673  ret <2 x i32> %v
1674}
1675
1676; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
1677; NO-SIMD128-NOT: v128
1678; SIMD128-NEXT: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1679; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1680; SIMD128-NEXT: return $pop[[R]]{{$}}
1681define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
1682  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
1683  %v = load <2 x i64>, <2 x i64>* %s
1684  ret <2 x i64> %v
1685}
1686
1687; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
1688; NO-SIMD128-NOT: v128
1689; SIMD128-NEXT: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1690; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 8($0){{$}}
1691; SIMD128-NEXT: return $pop[[R]]{{$}}
1692define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
1693  %s = getelementptr inbounds i64, i64* %p, i32 1
1694  %e = load i64, i64* %s
1695  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1696  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1697  ret <2 x i64> %v2
1698}
1699
1700; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
1701; NO-SIMD128-NOT: v128
1702; SIMD128-NEXT: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1703; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 8($0){{$}}
1704; SIMD128-NEXT: return $pop[[R]]{{$}}
1705define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1706  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1707  %v = load <2 x i32>, <2 x i32>* %s
1708  %v2 = sext <2 x i32> %v to <2 x i64>
1709  ret <2 x i64> %v2
1710}
1711
1712; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
1713; NO-SIMD128-NOT: v128
1714; SIMD128-NEXT: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1715; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 8($0){{$}}
1716; SIMD128-NEXT: return $pop[[R]]{{$}}
1717define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1718  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1719  %v = load <2 x i32>, <2 x i32>* %s
1720  %v2 = zext <2 x i32> %v to <2 x i64>
1721  ret <2 x i64> %v2
1722}
1723
1724; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
1725; NO-SIMD128-NOT: v128
1726; SIMD128-VM-NOT: load32x2
1727; SIMD128-NEXT: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1728; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 8($0){{$}}
1729; SIMD128-NEXT: return $pop[[R]]{{$}}
1730define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1731  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1732  %v = load <2 x i32>, <2 x i32>* %s
1733  ret <2 x i32> %v
1734}
1735
1736; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
1737; NO-SIMD128-NOT: v128
1738; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1739; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1740; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1741; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1742; SIMD128-NEXT: return $pop[[R]]{{$}}
1743define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
1744  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
1745  %v = load <2 x i64>, <2 x i64>* %s
1746  ret <2 x i64> %v
1747}
1748
1749; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
1750; NO-SIMD128-NOT: v128
1751; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1752; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1753; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1754; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1755; SIMD128-NEXT: return $pop[[R]]{{$}}
1756define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
1757  %s = getelementptr inbounds i64, i64* %p, i32 -1
1758  %e = load i64, i64* %s
1759  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1760  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1761  ret <2 x i64> %v2
1762}
1763
1764; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
1765; NO-SIMD128-NOT: v128
1766; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1767; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1768; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1769; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1770; SIMD128-NEXT: return $pop[[R]]{{$}}
1771define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1772  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1773  %v = load <2 x i32>, <2 x i32>* %s
1774  %v2 = sext <2 x i32> %v to <2 x i64>
1775  ret <2 x i64> %v2
1776}
1777
1778; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
1779; NO-SIMD128-NOT: v128
1780; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1781; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1782; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1783; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1784; SIMD128-NEXT: return $pop[[R]]{{$}}
1785define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1786  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1787  %v = load <2 x i32>, <2 x i32>* %s
1788  %v2 = zext <2 x i32> %v to <2 x i64>
1789  ret <2 x i64> %v2
1790}
1791
1792; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
1793; NO-SIMD128-NOT: v128
1794; SIMD128-VM-NOT: load32x2
1795; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1796; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1797; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1798; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1799; SIMD128-NEXT: return $pop[[R]]{{$}}
1800define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1801  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1802  %v = load <2 x i32>, <2 x i32>* %s
1803  ret <2 x i32> %v
1804}
1805
1806; CHECK-LABEL: load_v2i64_with_unfolded_offset:
1807; NO-SIMD128-NOT: v128
1808; SIMD128-NEXT: .functype load_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1809; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1810; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1811; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1812; SIMD128-NEXT: return $pop[[R]]{{$}}
1813define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
1814  %q = ptrtoint <2 x i64>* %p to i32
1815  %r = add nsw i32 %q, 16
1816  %s = inttoptr i32 %r to <2 x i64>*
1817  %v = load <2 x i64>, <2 x i64>* %s
1818  ret <2 x i64> %v
1819}
1820
1821; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
1822; NO-SIMD128-NOT: v128
1823; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1824; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1825; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1826; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1827; SIMD128-NEXT: return $pop[[R]]{{$}}
1828define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
1829  %q = ptrtoint i64* %p to i32
1830  %r = add nsw i32 %q, 16
1831  %s = inttoptr i32 %r to i64*
1832  %e = load i64, i64* %s
1833  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1834  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1835  ret <2 x i64> %v2
1836}
1837
1838; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
1839; NO-SIMD128-NOT: v128
1840; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1841; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1842; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1843; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1844; SIMD128-NEXT: return $pop[[R]]{{$}}
1845define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
1846  %q = ptrtoint <2 x i32>* %p to i32
1847  %r = add nsw i32 %q, 16
1848  %s = inttoptr i32 %r to <2 x i32>*
1849  %v = load <2 x i32>, <2 x i32>* %s
1850  %v2 = sext <2 x i32> %v to <2 x i64>
1851  ret <2 x i64> %v2
1852}
1853
1854; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
1855; NO-SIMD128-NOT: v128
1856; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1857; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1858; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1859; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1860; SIMD128-NEXT: return $pop[[R]]{{$}}
1861define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
1862  %q = ptrtoint <2 x i32>* %p to i32
1863  %r = add nsw i32 %q, 16
1864  %s = inttoptr i32 %r to <2 x i32>*
1865  %v = load <2 x i32>, <2 x i32>* %s
1866  %v2 = zext <2 x i32> %v to <2 x i64>
1867  ret <2 x i64> %v2
1868}
1869
1870; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
1871; NO-SIMD128-NOT: v128
1872; SIMD128-VM-NOT: load32x2
1873; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1874; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1875; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1876; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1877; SIMD128-NEXT: return $pop[[R]]{{$}}
1878define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
1879  %q = ptrtoint <2 x i32>* %p to i32
1880  %r = add nsw i32 %q, 16
1881  %s = inttoptr i32 %r to <2 x i32>*
1882  %v = load <2 x i32>, <2 x i32>* %s
1883  ret <2 x i32> %v
1884}
1885
1886; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
1887; NO-SIMD128-NOT: v128
1888; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1889; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1890; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1891; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1892; SIMD128-NEXT: return $pop[[R]]{{$}}
1893define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
1894  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
1895  %v = load <2 x i64>, <2 x i64>* %s
1896  ret <2 x i64> %v
1897}
1898
1899; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
1900; NO-SIMD128-NOT: v128
1901; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1902; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1903; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1904; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1905; SIMD128-NEXT: return $pop[[R]]{{$}}
1906define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
1907  %s = getelementptr i64, i64* %p, i32 1
1908  %e = load i64, i64* %s
1909  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1910  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1911  ret <2 x i64> %v2
1912}
1913
1914; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
1915; NO-SIMD128-NOT: v128
1916; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1917; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1918; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1919; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1920; SIMD128-NEXT: return $pop[[R]]{{$}}
1921define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
1922  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
1923  %v = load <2 x i32>, <2 x i32>* %s
1924  %v2 = sext <2 x i32> %v to <2 x i64>
1925  ret <2 x i64> %v2
1926}
1927
1928; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
1929; NO-SIMD128-NOT: v128
1930; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1931; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1932; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1933; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1934; SIMD128-NEXT: return $pop[[R]]{{$}}
1935define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
1936  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
1937  %v = load <2 x i32>, <2 x i32>* %s
1938  %v2 = zext <2 x i32> %v to <2 x i64>
1939  ret <2 x i64> %v2
1940}
1941
1942; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
1943; NO-SIMD128-NOT: v128
1944; SIMD128-VM-NOT: load32x2
1945; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1946; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1947; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1948; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1949; SIMD128-NEXT: return $pop[[R]]{{$}}
1950define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
1951  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
1952  %v = load <2 x i32>, <2 x i32>* %s
1953  ret <2 x i32> %v
1954}
1955
1956; CHECK-LABEL: load_v2i64_from_numeric_address:
1957; NO-SIMD128-NOT: v128
1958; SIMD128-NEXT: .functype load_v2i64_from_numeric_address () -> (v128){{$}}
1959; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1960; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1961; SIMD128-NEXT: return $pop[[R]]{{$}}
1962define <2 x i64> @load_v2i64_from_numeric_address() {
1963  %s = inttoptr i32 32 to <2 x i64>*
1964  %v = load <2 x i64>, <2 x i64>* %s
1965  ret <2 x i64> %v
1966}
1967
1968; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
1969; NO-SIMD128-NOT: v128
1970; SIMD128-NEXT: .functype load_splat_v2i64_from_numeric_address () -> (v128){{$}}
1971; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1972; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1973; SIMD128-NEXT: return $pop[[R]]{{$}}
1974define <2 x i64> @load_splat_v2i64_from_numeric_address() {
1975  %s = inttoptr i32 32 to i64*
1976  %e = load i64, i64* %s
1977  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1978  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1979  ret <2 x i64> %v2
1980}
1981
1982; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
1983; NO-SIMD128-NOT: v128
1984; SIMD128-NEXT: .functype load_sext_v2i64_from_numeric_address () -> (v128){{$}}
1985; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1986; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1987; SIMD128-NEXT: return $pop[[R]]{{$}}
1988define <2 x i64> @load_sext_v2i64_from_numeric_address() {
1989  %s = inttoptr i32 32 to <2 x i32>*
1990  %v = load <2 x i32>, <2 x i32>* %s
1991  %v2 = sext <2 x i32> %v to <2 x i64>
1992  ret <2 x i64> %v2
1993}
1994
1995; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
1996; NO-SIMD128-NOT: v128
1997; SIMD128-NEXT: .functype load_zext_v2i64_from_numeric_address () -> (v128){{$}}
1998; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1999; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2000; SIMD128-NEXT: return $pop[[R]]{{$}}
2001define <2 x i64> @load_zext_v2i64_from_numeric_address() {
2002  %s = inttoptr i32 32 to <2 x i32>*
2003  %v = load <2 x i32>, <2 x i32>* %s
2004  %v2 = zext <2 x i32> %v to <2 x i64>
2005  ret <2 x i64> %v2
2006}
2007
2008; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2009; NO-SIMD128-NOT: v128
2010; SIMD128-VM-NOT: load32x2
2011; SIMD128-NEXT: .functype load_ext_v2i64_from_numeric_address () -> (v128){{$}}
2012; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2013; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2014; SIMD128-NEXT: return $pop[[R]]{{$}}
2015define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2016  %s = inttoptr i32 32 to <2 x i32>*
2017  %v = load <2 x i32>, <2 x i32>* %s
2018  ret <2 x i32> %v
2019}
2020
2021; CHECK-LABEL: load_v2i64_from_global_address:
2022; NO-SIMD128-NOT: v128
2023; SIMD128-NEXT: .functype load_v2i64_from_global_address () -> (v128){{$}}
2024; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2025; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2i64($pop[[L0]]){{$}}
2026; SIMD128-NEXT: return $pop[[R]]{{$}}
2027@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2028define <2 x i64> @load_v2i64_from_global_address() {
2029  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
2030  ret <2 x i64> %v
2031}
2032
2033; CHECK-LABEL: load_splat_v2i64_from_global_address:
2034; NO-SIMD128-NOT: v128
2035; SIMD128-NEXT: .functype load_splat_v2i64_from_global_address () -> (v128){{$}}
2036; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2037; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, gv_i64($pop[[L0]]){{$}}
2038; SIMD128-NEXT: return $pop[[R]]{{$}}
2039@gv_i64 = global i64 42
2040define <2 x i64> @load_splat_v2i64_from_global_address() {
2041  %e = load i64, i64* @gv_i64
2042  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2043  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2044  ret <2 x i64> %v2
2045}
2046
2047; CHECK-LABEL: load_sext_v2i64_from_global_address:
2048; NO-SIMD128-NOT: v128
2049; SIMD128-NEXT: .functype load_sext_v2i64_from_global_address () -> (v128){{$}}
2050; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2051; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}}
2052; SIMD128-NEXT: return $pop[[R]]{{$}}
2053@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2054define <2 x i64> @load_sext_v2i64_from_global_address() {
2055  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2056  %v2 = sext <2 x i32> %v to <2 x i64>
2057  ret <2 x i64> %v2
2058}
2059
2060; CHECK-LABEL: load_zext_v2i64_from_global_address:
2061; NO-SIMD128-NOT: v128
2062; SIMD128-NEXT: .functype load_zext_v2i64_from_global_address () -> (v128){{$}}
2063; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2064; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}}
2065; SIMD128-NEXT: return $pop[[R]]{{$}}
2066define <2 x i64> @load_zext_v2i64_from_global_address() {
2067  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2068  %v2 = zext <2 x i32> %v to <2 x i64>
2069  ret <2 x i64> %v2
2070}
2071
2072; CHECK-LABEL: load_ext_v2i64_from_global_address:
2073; NO-SIMD128-NOT: v128
2074; SIMD128-VM-NOT: load32x2
2075; SIMD128-NEXT: .functype load_ext_v2i64_from_global_address () -> (v128){{$}}
2076; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2077; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}}
2078; SIMD128-NEXT: return $pop[[R]]{{$}}
2079define <2 x i32> @load_ext_v2i64_from_global_address() {
2080  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2081  ret <2 x i32> %v
2082}
2083
2084; CHECK-LABEL: store_v2i64:
2085; NO-SIMD128-NOT: v128
2086; SIMD128-NEXT: .functype store_v2i64 (v128, i32) -> (){{$}}
2087; SIMD128-NEXT: v128.store 0($1), $0{{$}}
2088define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
2089  store <2 x i64> %v , <2 x i64>* %p
2090  ret void
2091}
2092
2093; CHECK-LABEL: store_v2i64_with_folded_offset:
2094; NO-SIMD128-NOT: v128
2095; SIMD128-NEXT: .functype store_v2i64_with_folded_offset (v128, i32) -> (){{$}}
2096; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2097define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
2098  %q = ptrtoint <2 x i64>* %p to i32
2099  %r = add nuw i32 %q, 16
2100  %s = inttoptr i32 %r to <2 x i64>*
2101  store <2 x i64> %v , <2 x i64>* %s
2102  ret void
2103}
2104
2105; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2106; NO-SIMD128-NOT: v128
2107; SIMD128-NEXT: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> (){{$}}
2108; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2109define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2110  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2111  store <2 x i64> %v , <2 x i64>* %s
2112  ret void
2113}
2114
2115; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2116; NO-SIMD128-NOT: v128
2117; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
2118; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2119; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2120; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2121define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
2122  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2123  store <2 x i64> %v , <2 x i64>* %s
2124  ret void
2125}
2126
2127; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2128; NO-SIMD128-NOT: v128
2129; SIMD128-NEXT: .functype store_v2i64_with_unfolded_offset (v128, i32) -> (){{$}}
2130; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2131; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2132; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2133define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
2134  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2135  store <2 x i64> %v , <2 x i64>* %s
2136  ret void
2137}
2138
2139; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2140; NO-SIMD128-NOT: v128
2141; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> (){{$}}
2142; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2143; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2144; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2145define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2146  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2147  store <2 x i64> %v , <2 x i64>* %s
2148  ret void
2149}
2150
2151; CHECK-LABEL: store_v2i64_to_numeric_address:
2152; NO-SIMD128-NOT: v128
2153; SIMD128-NEXT: .functype store_v2i64_to_numeric_address (v128) -> (){{$}}
2154; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2155; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
2156define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2157  %s = inttoptr i32 32 to <2 x i64>*
2158  store <2 x i64> %v , <2 x i64>* %s
2159  ret void
2160}
2161
2162; CHECK-LABEL: store_v2i64_to_global_address:
2163; NO-SIMD128-NOT: v128
2164; SIMD128-NEXT: .functype store_v2i64_to_global_address (v128) -> (){{$}}
2165; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
2166; SIMD128-NEXT: v128.store gv_v2i64($pop[[R]]), $0{{$}}
2167define void @store_v2i64_to_global_address(<2 x i64> %v) {
2168  store <2 x i64> %v , <2 x i64>* @gv_v2i64
2169  ret void
2170}
2171
2172; ==============================================================================
2173; 4 x float
2174; ==============================================================================
2175; CHECK-LABEL: load_v4f32:
2176; NO-SIMD128-NOT: v128
2177; SIMD128-NEXT: .functype load_v4f32 (i32) -> (v128){{$}}
2178; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
2179; SIMD128-NEXT: return $pop[[R]]{{$}}
2180define <4 x float> @load_v4f32(<4 x float>* %p) {
2181  %v = load <4 x float>, <4 x float>* %p
2182  ret <4 x float> %v
2183}
2184
2185; CHECK-LABEL: load_splat_v4f32:
2186; NO-SIMD128-NOT: v128
2187; SIMD128-NEXT: .functype load_splat_v4f32 (i32) -> (v128){{$}}
2188; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
2189; SIMD128-NEXT: return $pop[[R]]{{$}}
2190define <4 x float> @load_splat_v4f32(float* %p) {
2191  %e = load float, float* %p
2192  %v1 = insertelement <4 x float> undef, float %e, i32 0
2193  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2194  ret <4 x float> %v2
2195}
2196
2197; CHECK-LABEL: load_v4f32_with_folded_offset:
2198; NO-SIMD128-NOT: v128
2199; SIMD128-NEXT: .functype load_v4f32_with_folded_offset (i32) -> (v128){{$}}
2200; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2201; SIMD128-NEXT: return $pop[[R]]{{$}}
2202define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
2203  %q = ptrtoint <4 x float>* %p to i32
2204  %r = add nuw i32 %q, 16
2205  %s = inttoptr i32 %r to <4 x float>*
2206  %v = load <4 x float>, <4 x float>* %s
2207  ret <4 x float> %v
2208}
2209
2210; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2211; NO-SIMD128-NOT: v128
2212; SIMD128-NEXT: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128){{$}}
2213; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
2214; SIMD128-NEXT: return $pop[[R]]{{$}}
2215define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
2216  %q = ptrtoint float* %p to i32
2217  %r = add nuw i32 %q, 16
2218  %s = inttoptr i32 %r to float*
2219  %e = load float, float* %s
2220  %v1 = insertelement <4 x float> undef, float %e, i32 0
2221  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2222  ret <4 x float> %v2
2223}
2224
2225; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2226; NO-SIMD128-NOT: v128
2227; SIMD128-NEXT: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128){{$}}
2228; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2229; SIMD128-NEXT: return $pop[[R]]{{$}}
2230define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
2231  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2232  %v = load <4 x float>, <4 x float>* %s
2233  ret <4 x float> %v
2234}
2235
2236; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2237; NO-SIMD128-NOT: v128
2238; SIMD128-NEXT: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128){{$}}
2239; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 4($0){{$}}
2240; SIMD128-NEXT: return $pop[[R]]{{$}}
2241define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
2242  %s = getelementptr inbounds float, float* %p, i32 1
2243  %e = load float, float* %s
2244  %v1 = insertelement <4 x float> undef, float %e, i32 0
2245  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2246  ret <4 x float> %v2
2247}
2248
2249; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2250; NO-SIMD128-NOT: v128
2251; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2252; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2253; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2254; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2255; SIMD128-NEXT: return $pop[[R]]{{$}}
2256define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
2257  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2258  %v = load <4 x float>, <4 x float>* %s
2259  ret <4 x float> %v
2260}
2261
2262; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2263; NO-SIMD128-NOT: v128
2264; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2265; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -4{{$}}
2266; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2267; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2268; SIMD128-NEXT: return $pop[[R]]{{$}}
2269define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
2270  %s = getelementptr inbounds float, float* %p, i32 -1
2271  %e = load float, float* %s
2272  %v1 = insertelement <4 x float> undef, float %e, i32 0
2273  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2274  ret <4 x float> %v2
2275}
2276
2277; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2278; NO-SIMD128-NOT: v128
2279; SIMD128-NEXT: .functype load_v4f32_with_unfolded_offset (i32) -> (v128){{$}}
2280; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2281; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2282; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2283; SIMD128-NEXT: return $pop[[R]]{{$}}
2284define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
2285  %q = ptrtoint <4 x float>* %p to i32
2286  %r = add nsw i32 %q, 16
2287  %s = inttoptr i32 %r to <4 x float>*
2288  %v = load <4 x float>, <4 x float>* %s
2289  ret <4 x float> %v
2290}
2291
2292; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2293; NO-SIMD128-NOT: v128
2294; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128){{$}}
2295; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2296; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2297; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2298; SIMD128-NEXT: return $pop[[R]]{{$}}
2299define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
2300  %q = ptrtoint float* %p to i32
2301  %r = add nsw i32 %q, 16
2302  %s = inttoptr i32 %r to float*
2303  %e = load float, float* %s
2304  %v1 = insertelement <4 x float> undef, float %e, i32 0
2305  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2306  ret <4 x float> %v2
2307}
2308
2309; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2310; NO-SIMD128-NOT: v128
2311; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}}
2312; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2313; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2314; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2315; SIMD128-NEXT: return $pop[[R]]{{$}}
2316define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
2317  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2318  %v = load <4 x float>, <4 x float>* %s
2319  ret <4 x float> %v
2320}
2321
2322; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
2323; NO-SIMD128-NOT: v128
2324; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}}
2325; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 4{{$}}
2326; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2327; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2328; SIMD128-NEXT: return $pop[[R]]{{$}}
2329define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
2330  %s = getelementptr float, float* %p, i32 1
2331  %e = load float, float* %s
2332  %v1 = insertelement <4 x float> undef, float %e, i32 0
2333  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2334  ret <4 x float> %v2
2335}
2336
2337; CHECK-LABEL: load_v4f32_from_numeric_address:
2338; NO-SIMD128-NOT: v128
2339; SIMD128-NEXT: .functype load_v4f32_from_numeric_address () -> (v128){{$}}
2340; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2341; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2342; SIMD128-NEXT: return $pop[[R]]{{$}}
2343define <4 x float> @load_v4f32_from_numeric_address() {
2344  %s = inttoptr i32 32 to <4 x float>*
2345  %v = load <4 x float>, <4 x float>* %s
2346  ret <4 x float> %v
2347}
2348
2349; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
2350; NO-SIMD128-NOT: v128
2351; SIMD128-NEXT: .functype load_splat_v4f32_from_numeric_address () -> (v128){{$}}
2352; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2353; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2354; SIMD128-NEXT: return $pop[[R]]{{$}}
2355define <4 x float> @load_splat_v4f32_from_numeric_address() {
2356  %s = inttoptr i32 32 to float*
2357  %e = load float, float* %s
2358  %v1 = insertelement <4 x float> undef, float %e, i32 0
2359  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2360  ret <4 x float> %v2
2361}
2362
2363; CHECK-LABEL: load_v4f32_from_global_address:
2364; NO-SIMD128-NOT: v128
2365; SIMD128-NEXT: .functype load_v4f32_from_global_address () -> (v128){{$}}
2366; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2367; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4f32($pop[[L0]]){{$}}
2368; SIMD128-NEXT: return $pop[[R]]{{$}}
2369@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
2370define <4 x float> @load_v4f32_from_global_address() {
2371  %v = load <4 x float>, <4 x float>* @gv_v4f32
2372  ret <4 x float> %v
2373}
2374
2375; CHECK-LABEL: load_splat_v4f32_from_global_address:
2376; NO-SIMD128-NOT: v128
2377; SIMD128-NEXT: .functype load_splat_v4f32_from_global_address () -> (v128){{$}}
2378; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2379; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, gv_f32($pop[[L0]]){{$}}
2380; SIMD128-NEXT: return $pop[[R]]{{$}}
2381@gv_f32 = global float 42.
2382define <4 x float> @load_splat_v4f32_from_global_address() {
2383  %e = load float, float* @gv_f32
2384  %v1 = insertelement <4 x float> undef, float %e, i32 0
2385  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2386  ret <4 x float> %v2
2387}
2388
2389; CHECK-LABEL: store_v4f32:
2390; NO-SIMD128-NOT: v128
2391; SIMD128-NEXT: .functype store_v4f32 (v128, i32) -> (){{$}}
2392; SIMD128-NEXT: v128.store 0($1), $0{{$}}
2393define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
2394  store <4 x float> %v , <4 x float>* %p
2395  ret void
2396}
2397
2398; CHECK-LABEL: store_v4f32_with_folded_offset:
2399; NO-SIMD128-NOT: v128
2400; SIMD128-NEXT: .functype store_v4f32_with_folded_offset (v128, i32) -> (){{$}}
2401; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2402define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
2403  %q = ptrtoint <4 x float>* %p to i32
2404  %r = add nuw i32 %q, 16
2405  %s = inttoptr i32 %r to <4 x float>*
2406  store <4 x float> %v , <4 x float>* %s
2407  ret void
2408}
2409
2410; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
2411; NO-SIMD128-NOT: v128
2412; SIMD128-NEXT: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> (){{$}}
2413; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2414define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2415  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2416  store <4 x float> %v , <4 x float>* %s
2417  ret void
2418}
2419
2420; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
2421; NO-SIMD128-NOT: v128
2422; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
2423; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2424; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2425; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2426define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
2427  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2428  store <4 x float> %v , <4 x float>* %s
2429  ret void
2430}
2431
2432; CHECK-LABEL: store_v4f32_with_unfolded_offset:
2433; NO-SIMD128-NOT: v128
2434; SIMD128-NEXT: .functype store_v4f32_with_unfolded_offset (v128, i32) -> (){{$}}
2435; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2436; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2437; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2438define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
2439  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2440  store <4 x float> %v , <4 x float>* %s
2441  ret void
2442}
2443
2444; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
2445; NO-SIMD128-NOT: v128
2446; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> (){{$}}
2447; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2448; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2449; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2450define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2451  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2452  store <4 x float> %v , <4 x float>* %s
2453  ret void
2454}
2455
2456; CHECK-LABEL: store_v4f32_to_numeric_address:
2457; NO-SIMD128-NOT: v128
2458; SIMD128-NEXT: .functype store_v4f32_to_numeric_address (v128) -> (){{$}}
2459; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2460; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
2461define void @store_v4f32_to_numeric_address(<4 x float> %v) {
2462  %s = inttoptr i32 32 to <4 x float>*
2463  store <4 x float> %v , <4 x float>* %s
2464  ret void
2465}
2466
2467; CHECK-LABEL: store_v4f32_to_global_address:
2468; NO-SIMD128-NOT: v128
2469; SIMD128-NEXT: .functype store_v4f32_to_global_address (v128) -> (){{$}}
2470; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
2471; SIMD128-NEXT: v128.store gv_v4f32($pop[[R]]), $0{{$}}
2472define void @store_v4f32_to_global_address(<4 x float> %v) {
2473  store <4 x float> %v , <4 x float>* @gv_v4f32
2474  ret void
2475}
2476
2477; ==============================================================================
2478; 2 x double
2479; ==============================================================================
2480; CHECK-LABEL: load_v2f64:
2481; NO-SIMD128-NOT: v128
2482; SIMD128-NEXT: .functype load_v2f64 (i32) -> (v128){{$}}
2483; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
2484; SIMD128-NEXT: return $pop[[R]]{{$}}
2485define <2 x double> @load_v2f64(<2 x double>* %p) {
2486  %v = load <2 x double>, <2 x double>* %p
2487  ret <2 x double> %v
2488}
2489
2490; CHECK-LABEL: load_splat_v2f64:
2491; NO-SIMD128-NOT: v128
2492; SIMD128-NEXT: .functype load_splat_v2f64 (i32) -> (v128){{$}}
2493; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
2494; SIMD128-NEXT: return $pop[[R]]{{$}}
2495define <2 x double> @load_splat_v2f64(double* %p) {
2496  %e = load double, double* %p
2497  %v1 = insertelement <2 x double> undef, double %e, i32 0
2498  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2499  ret <2 x double> %v2
2500}
2501
2502; CHECK-LABEL: load_v2f64_with_folded_offset:
2503; NO-SIMD128-NOT: v128
2504; SIMD128-NEXT: .functype load_v2f64_with_folded_offset (i32) -> (v128){{$}}
2505; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2506; SIMD128-NEXT: return $pop[[R]]{{$}}
2507define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
2508  %q = ptrtoint <2 x double>* %p to i32
2509  %r = add nuw i32 %q, 16
2510  %s = inttoptr i32 %r to <2 x double>*
2511  %v = load <2 x double>, <2 x double>* %s
2512  ret <2 x double> %v
2513}
2514
2515; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
2516; NO-SIMD128-NOT: v128
2517; SIMD128-NEXT: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128){{$}}
2518; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
2519; SIMD128-NEXT: return $pop[[R]]{{$}}
2520define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
2521  %q = ptrtoint double* %p to i32
2522  %r = add nuw i32 %q, 16
2523  %s = inttoptr i32 %r to double*
2524  %e = load double, double* %s
2525  %v1 = insertelement <2 x double> undef, double %e, i32 0
2526  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2527  ret <2 x double> %v2
2528}
2529
2530; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
2531; NO-SIMD128-NOT: v128
2532; SIMD128-NEXT: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128){{$}}
2533; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2534; SIMD128-NEXT: return $pop[[R]]{{$}}
2535define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
2536  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
2537  %v = load <2 x double>, <2 x double>* %s
2538  ret <2 x double> %v
2539}
2540
2541; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
2542; NO-SIMD128-NOT: v128
2543; SIMD128-NEXT: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128){{$}}
2544; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 8($0){{$}}
2545; SIMD128-NEXT: return $pop[[R]]{{$}}
2546define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
2547  %s = getelementptr inbounds double, double* %p, i32 1
2548  %e = load double, double* %s
2549  %v1 = insertelement <2 x double> undef, double %e, i32 0
2550  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2551  ret <2 x double> %v2
2552}
2553
2554; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
2555; NO-SIMD128-NOT: v128
2556; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2557; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2558; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2559; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2560; SIMD128-NEXT: return $pop[[R]]{{$}}
2561define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
2562  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2563  %v = load <2 x double>, <2 x double>* %s
2564  ret <2 x double> %v
2565}
2566
2567; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
2568; NO-SIMD128-NOT: v128
2569; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2570; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
2571; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2572; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2573; SIMD128-NEXT: return $pop[[R]]{{$}}
2574define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
2575  %s = getelementptr inbounds double, double* %p, i32 -1
2576  %e = load double, double* %s
2577  %v1 = insertelement <2 x double> undef, double %e, i32 0
2578  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2579  ret <2 x double> %v2
2580}
2581
2582; CHECK-LABEL: load_v2f64_with_unfolded_offset:
2583; NO-SIMD128-NOT: v128
2584; SIMD128-NEXT: .functype load_v2f64_with_unfolded_offset (i32) -> (v128){{$}}
2585; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2586; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2587; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2588; SIMD128-NEXT: return $pop[[R]]{{$}}
2589define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
2590  %q = ptrtoint <2 x double>* %p to i32
2591  %r = add nsw i32 %q, 16
2592  %s = inttoptr i32 %r to <2 x double>*
2593  %v = load <2 x double>, <2 x double>* %s
2594  ret <2 x double> %v
2595}
2596
2597; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
2598; NO-SIMD128-NOT: v128
2599; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128){{$}}
2600; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2601; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2602; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2603; SIMD128-NEXT: return $pop[[R]]{{$}}
2604define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
2605  %q = ptrtoint double* %p to i32
2606  %r = add nsw i32 %q, 16
2607  %s = inttoptr i32 %r to double*
2608  %e = load double, double* %s
2609  %v1 = insertelement <2 x double> undef, double %e, i32 0
2610  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2611  ret <2 x double> %v2
2612}
2613
2614; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
2615; NO-SIMD128-NOT: v128
2616; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}}
2617; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2618; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2619; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2620; SIMD128-NEXT: return $pop[[R]]{{$}}
2621define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
2622  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
2623  %v = load <2 x double>, <2 x double>* %s
2624  ret <2 x double> %v
2625}
2626
2627; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
2628; NO-SIMD128-NOT: v128
2629; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}}
2630; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
2631; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2632; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2633; SIMD128-NEXT: return $pop[[R]]{{$}}
2634define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
2635  %s = getelementptr double, double* %p, i32 1
2636  %e = load double, double* %s
2637  %v1 = insertelement <2 x double> undef, double %e, i32 0
2638  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2639  ret <2 x double> %v2
2640}
2641
2642; CHECK-LABEL: load_v2f64_from_numeric_address:
2643; NO-SIMD128-NOT: v128
2644; SIMD128-NEXT: .functype load_v2f64_from_numeric_address () -> (v128){{$}}
2645; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2646; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2647; SIMD128-NEXT: return $pop[[R]]{{$}}
2648define <2 x double> @load_v2f64_from_numeric_address() {
2649  %s = inttoptr i32 32 to <2 x double>*
2650  %v = load <2 x double>, <2 x double>* %s
2651  ret <2 x double> %v
2652}
2653
2654; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
2655; NO-SIMD128-NOT: v128
2656; SIMD128-NEXT: .functype load_splat_v2f64_from_numeric_address () -> (v128){{$}}
2657; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2658; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2659; SIMD128-NEXT: return $pop[[R]]{{$}}
2660define <2 x double> @load_splat_v2f64_from_numeric_address() {
2661  %s = inttoptr i32 32 to double*
2662  %e = load double, double* %s
2663  %v1 = insertelement <2 x double> undef, double %e, i32 0
2664  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2665  ret <2 x double> %v2
2666}
2667
2668; CHECK-LABEL: load_v2f64_from_global_address:
2669; NO-SIMD128-NOT: v128
2670; SIMD128-NEXT: .functype load_v2f64_from_global_address () -> (v128){{$}}
2671; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2672; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2f64($pop[[L0]]){{$}}
2673; SIMD128-NEXT: return $pop[[R]]{{$}}
2674@gv_v2f64 = global <2 x double> <double 42., double 42.>
2675define <2 x double> @load_v2f64_from_global_address() {
2676  %v = load <2 x double>, <2 x double>* @gv_v2f64
2677  ret <2 x double> %v
2678}
2679
2680; CHECK-LABEL: load_splat_v2f64_from_global_address:
2681; NO-SIMD128-NOT: v128
2682; SIMD128-NEXT: .functype load_splat_v2f64_from_global_address () -> (v128){{$}}
2683; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2684; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, gv_f64($pop[[L0]]){{$}}
2685; SIMD128-NEXT: return $pop[[R]]{{$}}
2686@gv_f64 = global double 42.
2687define <2 x double> @load_splat_v2f64_from_global_address() {
2688  %e = load double, double* @gv_f64
2689  %v1 = insertelement <2 x double> undef, double %e, i32 0
2690  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2691  ret <2 x double> %v2
2692}
2693
2694; CHECK-LABEL: store_v2f64:
2695; NO-SIMD128-NOT: v128
2696; SIMD128-NEXT: .functype store_v2f64 (v128, i32) -> (){{$}}
2697; SIMD128-NEXT: v128.store 0($1), $0{{$}}
2698define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
2699  store <2 x double> %v , <2 x double>* %p
2700  ret void
2701}
2702
2703; CHECK-LABEL: store_v2f64_with_folded_offset:
2704; NO-SIMD128-NOT: v128
2705; SIMD128-NEXT: .functype store_v2f64_with_folded_offset (v128, i32) -> (){{$}}
2706; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2707define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
2708  %q = ptrtoint <2 x double>* %p to i32
2709  %r = add nuw i32 %q, 16
2710  %s = inttoptr i32 %r to <2 x double>*
2711  store <2 x double> %v , <2 x double>* %s
2712  ret void
2713}
2714
2715; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
2716; NO-SIMD128-NOT: v128
2717; SIMD128-NEXT: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> (){{$}}
2718; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2719define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
2720  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
2721  store <2 x double> %v , <2 x double>* %s
2722  ret void
2723}
2724
2725; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
2726; NO-SIMD128-NOT: v128
2727; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
2728; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2729; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2730; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2731define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
2732  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2733  store <2 x double> %v , <2 x double>* %s
2734  ret void
2735}
2736
2737; CHECK-LABEL: store_v2f64_with_unfolded_offset:
2738; NO-SIMD128-NOT: v128
2739; SIMD128-NEXT: .functype store_v2f64_with_unfolded_offset (v128, i32) -> (){{$}}
2740; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2741; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2742; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2743define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
2744  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2745  store <2 x double> %v , <2 x double>* %s
2746  ret void
2747}
2748
2749; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
2750; NO-SIMD128-NOT: v128
2751; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> (){{$}}
2752; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2753; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2754; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2755define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
2756  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
2757  store <2 x double> %v , <2 x double>* %s
2758  ret void
2759}
2760
2761; CHECK-LABEL: store_v2f64_to_numeric_address:
2762; NO-SIMD128-NOT: v128
2763; SIMD128-NEXT: .functype store_v2f64_to_numeric_address (v128) -> (){{$}}
2764; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2765; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
2766define void @store_v2f64_to_numeric_address(<2 x double> %v) {
2767  %s = inttoptr i32 32 to <2 x double>*
2768  store <2 x double> %v , <2 x double>* %s
2769  ret void
2770}
2771
2772; CHECK-LABEL: store_v2f64_to_global_address:
2773; NO-SIMD128-NOT: v128
2774; SIMD128-NEXT: .functype store_v2f64_to_global_address (v128) -> (){{$}}
2775; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
2776; SIMD128-NEXT: v128.store gv_v2f64($pop[[R]]), $0{{$}}
2777define void @store_v2f64_to_global_address(<2 x double> %v) {
2778  store <2 x double> %v , <2 x double>* @gv_v2f64
2779  ret void
2780}
2781