1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
3; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128
4
5; Test SIMD loads and stores
6
7target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
8target triple = "wasm32-unknown-unknown"
9
10; ==============================================================================
11; 16 x i8
12; ==============================================================================
13; CHECK-LABEL: load_v16i8:
14; NO-SIMD128-NOT: v128
15; SIMD128-NEXT: .functype load_v16i8 (i32) -> (v128){{$}}
16; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
17; SIMD128-NEXT: return $pop[[R]]{{$}}
18define <16 x i8> @load_v16i8(<16 x i8>* %p) {
19  %v = load <16 x i8>, <16 x i8>* %p
20  ret <16 x i8> %v
21}
22
23; CHECK-LABEL: load_splat_v16i8:
24; SIMD128-VM-NOT: v8x16.load_splat
25; NO-SIMD128-NOT: v128
26; SIMD128-NEXT: .functype load_splat_v16i8 (i32) -> (v128){{$}}
27; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
28; SIMD128-NEXT: return $pop[[R]]{{$}}
29define <16 x i8> @load_splat_v16i8(i8* %p) {
30  %e = load i8, i8* %p
31  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
32  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
33  ret <16 x i8> %v2
34}
35
36; CHECK-LABEL: load_v16i8_with_folded_offset:
37; NO-SIMD128-NOT: v128
38; SIMD128-NEXT: .functype load_v16i8_with_folded_offset (i32) -> (v128){{$}}
39; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
40; SIMD128-NEXT: return $pop[[R]]{{$}}
41define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
42  %q = ptrtoint <16 x i8>* %p to i32
43  %r = add nuw i32 %q, 16
44  %s = inttoptr i32 %r to <16 x i8>*
45  %v = load <16 x i8>, <16 x i8>* %s
46  ret <16 x i8> %v
47}
48
49; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
50; NO-SIMD128-NOT: v128
51; SIMD128-NEXT: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128){{$}}
52; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
53; SIMD128-NEXT: return $pop[[R]]{{$}}
54define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
55  %q = ptrtoint i8* %p to i32
56  %r = add nuw i32 %q, 16
57  %s = inttoptr i32 %r to i8*
58  %e = load i8, i8* %s
59  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
60  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
61  ret <16 x i8> %v2
62}
63
64; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
65; NO-SIMD128-NOT: v128
66; SIMD128-NEXT: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128){{$}}
67; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
68; SIMD128-NEXT: return $pop[[R]]{{$}}
69define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
70  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
71  %v = load <16 x i8>, <16 x i8>* %s
72  ret <16 x i8> %v
73}
74
75; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
76; NO-SIMD128-NOT: v128
77; SIMD128-NEXT: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128){{$}}
78; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 1($0){{$}}
79; SIMD128-NEXT: return $pop[[R]]{{$}}
80define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
81  %s = getelementptr inbounds i8, i8* %p, i32 1
82  %e = load i8, i8* %s
83  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
84  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
85  ret <16 x i8> %v2
86}
87
88; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
89; NO-SIMD128-NOT: v128
90; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
91; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
92; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
93; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
94; SIMD128-NEXT: return $pop[[R]]{{$}}
95define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
96  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
97  %v = load <16 x i8>, <16 x i8>* %s
98  ret <16 x i8> %v
99}
100
101; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
102; NO-SIMD128-NOT: v128
103; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
104; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
105; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
106; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
107; SIMD128-NEXT: return $pop[[R]]{{$}}
108define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
109  %s = getelementptr inbounds i8, i8* %p, i32 -1
110  %e = load i8, i8* %s
111  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
112  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
113  ret <16 x i8> %v2
114}
115
116; CHECK-LABEL: load_v16i8_with_unfolded_offset:
117; NO-SIMD128-NOT: v128
118; SIMD128-NEXT: .functype load_v16i8_with_unfolded_offset (i32) -> (v128){{$}}
119; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
120; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
121; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
122; SIMD128-NEXT: return $pop[[R]]{{$}}
123define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
124  %q = ptrtoint <16 x i8>* %p to i32
125  %r = add nsw i32 %q, 16
126  %s = inttoptr i32 %r to <16 x i8>*
127  %v = load <16 x i8>, <16 x i8>* %s
128  ret <16 x i8> %v
129}
130
131; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
132; NO-SIMD128-NOT: v128
133; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128){{$}}
134; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
135; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
136; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
137; SIMD128-NEXT: return $pop[[R]]{{$}}
138define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
139  %q = ptrtoint i8* %p to i32
140  %r = add nsw i32 %q, 16
141  %s = inttoptr i32 %r to i8*
142  %e = load i8, i8* %s
143  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
144  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
145  ret <16 x i8> %v2
146}
147
148; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
149; NO-SIMD128-NOT: v128
150; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}}
151; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
152; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
153; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
154; SIMD128-NEXT: return $pop[[R]]{{$}}
155define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
156  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
157  %v = load <16 x i8>, <16 x i8>* %s
158  ret <16 x i8> %v
159}
160
161; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
162; NO-SIMD128-NOT: v128
163; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}}
164; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 1{{$}}
165; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
166; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
167; SIMD128-NEXT: return $pop[[R]]{{$}}
168define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
169  %s = getelementptr i8, i8* %p, i32 1
170  %e = load i8, i8* %s
171  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
172  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
173  ret <16 x i8> %v2
174}
175
176; CHECK-LABEL: load_v16i8_from_numeric_address:
177; NO-SIMD128-NOT: v128
178; SIMD128-NEXT: .functype load_v16i8_from_numeric_address () -> (v128){{$}}
179; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
180; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
181; SIMD128-NEXT: return $pop[[R]]{{$}}
182define <16 x i8> @load_v16i8_from_numeric_address() {
183  %s = inttoptr i32 32 to <16 x i8>*
184  %v = load <16 x i8>, <16 x i8>* %s
185  ret <16 x i8> %v
186}
187
188; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
189; NO-SIMD128-NOT: v128
190; SIMD128-NEXT: .functype load_splat_v16i8_from_numeric_address () -> (v128){{$}}
191; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
192; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
193; SIMD128-NEXT: return $pop[[R]]{{$}}
194define <16 x i8> @load_splat_v16i8_from_numeric_address() {
195  %s = inttoptr i32 32 to i8*
196  %e = load i8, i8* %s
197  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
198  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
199  ret <16 x i8> %v2
200}
201
202; CHECK-LABEL: load_v16i8_from_global_address:
203; NO-SIMD128-NOT: v128
204; SIMD128-NEXT: .functype load_v16i8_from_global_address () -> (v128){{$}}
205; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
206; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v16i8($pop[[L0]]){{$}}
207; SIMD128-NEXT: return $pop[[R]]{{$}}
208@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
209define <16 x i8> @load_v16i8_from_global_address() {
210  %v = load <16 x i8>, <16 x i8>* @gv_v16i8
211  ret <16 x i8> %v
212}
213
214; CHECK-LABEL: load_splat_v16i8_from_global_address:
215; NO-SIMD128-NOT: v128
216; SIMD128-NEXT: .functype load_splat_v16i8_from_global_address () -> (v128){{$}}
217; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
218; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, gv_i8($pop[[L0]]){{$}}
219; SIMD128-NEXT: return $pop[[R]]{{$}}
220@gv_i8 = global i8 42
221define <16 x i8> @load_splat_v16i8_from_global_address() {
222  %e = load i8, i8* @gv_i8
223  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
224  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
225  ret <16 x i8> %v2
226}
227
228; CHECK-LABEL: store_v16i8:
229; NO-SIMD128-NOT: v128
230; SIMD128-NEXT: .functype store_v16i8 (v128, i32) -> (){{$}}
231; SIMD128-NEXT: v128.store 0($1), $0{{$}}
232define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
233  store <16 x i8> %v , <16 x i8>* %p
234  ret void
235}
236
237; CHECK-LABEL: store_v16i8_with_folded_offset:
238; NO-SIMD128-NOT: v128
239; SIMD128-NEXT: .functype store_v16i8_with_folded_offset (v128, i32) -> (){{$}}
240; SIMD128-NEXT: v128.store 16($1), $0{{$}}
241define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
242  %q = ptrtoint <16 x i8>* %p to i32
243  %r = add nuw i32 %q, 16
244  %s = inttoptr i32 %r to <16 x i8>*
245  store <16 x i8> %v , <16 x i8>* %s
246  ret void
247}
248
249; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
250; NO-SIMD128-NOT: v128
251; SIMD128-NEXT: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> (){{$}}
252; SIMD128-NEXT: v128.store 16($1), $0{{$}}
253define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
254  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
255  store <16 x i8> %v , <16 x i8>* %s
256  ret void
257}
258
259; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
260; NO-SIMD128-NOT: v128
261; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
262; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
263; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
264; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
265define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
266  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
267  store <16 x i8> %v , <16 x i8>* %s
268  ret void
269}
270
271; CHECK-LABEL: store_v16i8_with_unfolded_offset:
272; NO-SIMD128-NOT: v128
273; SIMD128-NEXT: .functype store_v16i8_with_unfolded_offset (v128, i32) -> (){{$}}
274; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
275; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
276; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
277define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
278  %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
279  store <16 x i8> %v , <16 x i8>* %s
280  ret void
281}
282
283; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
284; NO-SIMD128-NOT: v128
285; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> (){{$}}
286; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
287; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
288; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
289define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
290  %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
291  store <16 x i8> %v , <16 x i8>* %s
292  ret void
293}
294
295; CHECK-LABEL: store_v16i8_to_numeric_address:
296; NO-SIMD128-NOT: v128
297; SIMD128-NEXT: .functype store_v16i8_to_numeric_address (v128) -> (){{$}}
298; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
299; SIMD128-NEXT: v128.store 32($pop[[R]]), $0{{$}}
300define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
301  %s = inttoptr i32 32 to <16 x i8>*
302  store <16 x i8> %v , <16 x i8>* %s
303  ret void
304}
305
306; CHECK-LABEL: store_v16i8_to_global_address:
307; NO-SIMD128-NOT: v128
308; SIMD128-NEXT: .functype store_v16i8_to_global_address (v128) -> (){{$}}
309; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
310; SIMD128-NEXT: v128.store gv_v16i8($pop[[R]]), $0{{$}}
311define void @store_v16i8_to_global_address(<16 x i8> %v) {
312  store <16 x i8> %v , <16 x i8>* @gv_v16i8
313  ret void
314}
315
316; ==============================================================================
317; 8 x i16
318; ==============================================================================
319; CHECK-LABEL: load_v8i16:
320; NO-SIMD128-NOT: v128
321; SIMD128-NEXT: .functype load_v8i16 (i32) -> (v128){{$}}
322; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
323; SIMD128-NEXT: return $pop[[R]]{{$}}
324define <8 x i16> @load_v8i16(<8 x i16>* %p) {
325  %v = load <8 x i16>, <8 x i16>* %p
326  ret <8 x i16> %v
327}
328
329; CHECK-LABEL: load_splat_v8i16:
330; NO-SIMD128-NOT: v128
331; SIMD128-NEXT: .functype load_splat_v8i16 (i32) -> (v128){{$}}
332; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
333; SIMD128-NEXT: return $pop[[R]]{{$}}
334define <8 x i16> @load_splat_v8i16(i16* %p) {
335  %e = load i16, i16* %p
336  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
337  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
338  ret <8 x i16> %v2
339}
340
341; CHECK-LABEL: load_sext_v8i16:
342; NO-SIMD128-NOT: v128
343; SIMD128-NEXT: .functype load_sext_v8i16 (i32) -> (v128){{$}}
344; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($0){{$}}
345; SIMD128-NEXT: return $pop[[R]]{{$}}
346define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) {
347  %v = load <8 x i8>, <8 x i8>* %p
348  %v2 = sext <8 x i8> %v to <8 x i16>
349  ret <8 x i16> %v2
350}
351
352; CHECK-LABEL: load_zext_v8i16:
353; NO-SIMD128-NOT: v128
354; SIMD128-NEXT: .functype load_zext_v8i16 (i32) -> (v128){{$}}
355; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($0){{$}}
356; SIMD128-NEXT: return $pop[[R]]{{$}}
357define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) {
358  %v = load <8 x i8>, <8 x i8>* %p
359  %v2 = zext <8 x i8> %v to <8 x i16>
360  ret <8 x i16> %v2
361}
362
363; CHECK-LABEL: load_ext_v8i16:
364; NO-SIMD128-NOT: load8x8
365; SIMD128-NEXT: .functype load_ext_v8i16 (i32) -> (v128){{$}}
366; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($0){{$}}
367; SIMD128-NEXT: return $pop[[R]]{{$}}
368define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) {
369  %v = load <8 x i8>, <8 x i8>* %p
370  ret <8 x i8> %v
371}
372
373; CHECK-LABEL: load_v8i16_with_folded_offset:
374; NO-SIMD128-NOT: v128
375; SIMD128-NEXT: .functype load_v8i16_with_folded_offset (i32) -> (v128){{$}}
376; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
377; SIMD128-NEXT: return $pop[[R]]{{$}}
378define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
379  %q = ptrtoint <8 x i16>* %p to i32
380  %r = add nuw i32 %q, 16
381  %s = inttoptr i32 %r to <8 x i16>*
382  %v = load <8 x i16>, <8 x i16>* %s
383  ret <8 x i16> %v
384}
385
386; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
387; NO-SIMD128-NOT: v128
388; SIMD128-NEXT: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128){{$}}
389; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
390; SIMD128-NEXT: return $pop[[R]]{{$}}
391define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
392  %q = ptrtoint i16* %p to i32
393  %r = add nuw i32 %q, 16
394  %s = inttoptr i32 %r to i16*
395  %e = load i16, i16* %s
396  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
397  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
398  ret <8 x i16> %v2
399}
400
401; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
402; NO-SIMD128-NOT: v128
403; SIMD128-NEXT: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128){{$}}
404; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 16($0){{$}}
405; SIMD128-NEXT: return $pop[[R]]{{$}}
406define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) {
407  %q = ptrtoint <8 x i8>* %p to i32
408  %r = add nuw i32 %q, 16
409  %s = inttoptr i32 %r to <8 x i8>*
410  %v = load <8 x i8>, <8 x i8>* %s
411  %v2 = sext <8 x i8> %v to <8 x i16>
412  ret <8 x i16> %v2
413}
414
415; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
416; NO-SIMD128-NOT: v128
417; SIMD128-NEXT: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128){{$}}
418; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 16($0){{$}}
419; SIMD128-NEXT: return $pop[[R]]{{$}}
420define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) {
421  %q = ptrtoint <8 x i8>* %p to i32
422  %r = add nuw i32 %q, 16
423  %s = inttoptr i32 %r to <8 x i8>*
424  %v = load <8 x i8>, <8 x i8>* %s
425  %v2 = zext <8 x i8> %v to <8 x i16>
426  ret <8 x i16> %v2
427}
428
429; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
430; NO-SIMD128-NOT: load8x8
431; SIMD128-NEXT: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128){{$}}
432; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 16($0){{$}}
433; SIMD128-NEXT: return $pop[[R]]{{$}}
434define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) {
435  %q = ptrtoint <8 x i8>* %p to i32
436  %r = add nuw i32 %q, 16
437  %s = inttoptr i32 %r to <8 x i8>*
438  %v = load <8 x i8>, <8 x i8>* %s
439  ret <8 x i8> %v
440}
441
442; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
443; NO-SIMD128-NOT: v128
444; SIMD128-NEXT: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
445; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
446; SIMD128-NEXT: return $pop[[R]]{{$}}
447define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
448  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
449  %v = load <8 x i16>, <8 x i16>* %s
450  ret <8 x i16> %v
451}
452
453; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
454; NO-SIMD128-NOT: v128
455; SIMD128-NEXT: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
456; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 2($0){{$}}
457; SIMD128-NEXT: return $pop[[R]]{{$}}
458define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
459  %s = getelementptr inbounds i16, i16* %p, i32 1
460  %e = load i16, i16* %s
461  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
462  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
463  ret <8 x i16> %v2
464}
465
466; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
467; NO-SIMD128-NOT: v128
468; SIMD128-NEXT: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
469; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 8($0){{$}}
470; SIMD128-NEXT: return $pop[[R]]{{$}}
471define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
472  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
473  %v = load <8 x i8>, <8 x i8>* %s
474  %v2 = sext <8 x i8> %v to <8 x i16>
475  ret <8 x i16> %v2
476}
477
478; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
479; NO-SIMD128-NOT: v128
480; SIMD128-NEXT: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
481; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 8($0){{$}}
482; SIMD128-NEXT: return $pop[[R]]{{$}}
483define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
484  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
485  %v = load <8 x i8>, <8 x i8>* %s
486  %v2 = zext <8 x i8> %v to <8 x i16>
487  ret <8 x i16> %v2
488}
489
490; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
491; NO-SIMD128-NOT: load8x8
492; SIMD128-NEXT: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
493; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 8($0){{$}}
494; SIMD128-NEXT: return $pop[[R]]{{$}}
495define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) {
496  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1
497  %v = load <8 x i8>, <8 x i8>* %s
498  ret <8 x i8> %v
499}
500
501; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
502; NO-SIMD128-NOT: v128
503; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
504; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
505; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
506; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
507; SIMD128-NEXT: return $pop[[R]]{{$}}
508define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
509  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
510  %v = load <8 x i16>, <8 x i16>* %s
511  ret <8 x i16> %v
512}
513
514; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
515; NO-SIMD128-NOT: v128
516; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
517; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -2{{$}}
518; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
519; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
520; SIMD128-NEXT: return $pop[[R]]{{$}}
521define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
522  %s = getelementptr inbounds i16, i16* %p, i32 -1
523  %e = load i16, i16* %s
524  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
525  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
526  ret <8 x i16> %v2
527}
528
529; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
530; NO-SIMD128-NOT: v128
531; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
532; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
533; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
534; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
535; SIMD128-NEXT: return $pop[[R]]{{$}}
536define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
537  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
538  %v = load <8 x i8>, <8 x i8>* %s
539  %v2 = sext <8 x i8> %v to <8 x i16>
540  ret <8 x i16> %v2
541}
542
543; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
544; NO-SIMD128-NOT: v128
545; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
546; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
547; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
548; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
549; SIMD128-NEXT: return $pop[[R]]{{$}}
550define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
551  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
552  %v = load <8 x i8>, <8 x i8>* %s
553  %v2 = zext <8 x i8> %v to <8 x i16>
554  ret <8 x i16> %v2
555}
556
557; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
558; NO-SIMD128-NOT: load8x8
559; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
560; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
561; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
562; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
563; SIMD128-NEXT: return $pop[[R]]{{$}}
564define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) {
565  %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1
566  %v = load <8 x i8>, <8 x i8>* %s
567  ret <8 x i8> %v
568}
569
570; CHECK-LABEL: load_v8i16_with_unfolded_offset:
571; NO-SIMD128-NOT: v128
572; SIMD128-NEXT: .functype load_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
573; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
574; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
575; SIMD128-NEXT: v128.load $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
576; SIMD128-NEXT: return $pop[[L0]]{{$}}
577define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
578  %q = ptrtoint <8 x i16>* %p to i32
579  %r = add nsw i32 %q, 16
580  %s = inttoptr i32 %r to <8 x i16>*
581  %v = load <8 x i16>, <8 x i16>* %s
582  ret <8 x i16> %v
583}
584
585; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
586; NO-SIMD128-NOT: v128
587; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
588; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
589; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
590; SIMD128-NEXT: v16x8.load_splat $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
591; SIMD128-NEXT: return $pop[[L0]]{{$}}
592define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
593  %q = ptrtoint i16* %p to i32
594  %r = add nsw i32 %q, 16
595  %s = inttoptr i32 %r to i16*
596  %e = load i16, i16* %s
597  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
598  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
599  ret <8 x i16> %v2
600}
601
602; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
603; NO-SIMD128-NOT: v128
604; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
605; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
606; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
607; SIMD128-NEXT: i16x8.load8x8_s $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
608; SIMD128-NEXT: return $pop[[L0]]{{$}}
609define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
610  %q = ptrtoint <8 x i8>* %p to i32
611  %r = add nsw i32 %q, 16
612  %s = inttoptr i32 %r to <8 x i8>*
613  %v = load <8 x i8>, <8 x i8>* %s
614  %v2 = sext <8 x i8> %v to <8 x i16>
615  ret <8 x i16> %v2
616}
617
618; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
619; NO-SIMD128-NOT: v128
620; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
621; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
622; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
623; SIMD128-NEXT: i16x8.load8x8_u $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
624; SIMD128-NEXT: return $pop[[L0]]{{$}}
625define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
626  %q = ptrtoint <8 x i8>* %p to i32
627  %r = add nsw i32 %q, 16
628  %s = inttoptr i32 %r to <8 x i8>*
629  %v = load <8 x i8>, <8 x i8>* %s
630  %v2 = zext <8 x i8> %v to <8 x i16>
631  ret <8 x i16> %v2
632}
633
634; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
635; NO-SIMD128-NOT: load8x8
636; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
637; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
638; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
639; SIMD128-NEXT: i16x8.load8x8_u $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
640; SIMD128-NEXT: return $pop[[L0]]{{$}}
641define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) {
642  %q = ptrtoint <8 x i8>* %p to i32
643  %r = add nsw i32 %q, 16
644  %s = inttoptr i32 %r to <8 x i8>*
645  %v = load <8 x i8>, <8 x i8>* %s
646  ret <8 x i8> %v
647}
648
649; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
650; NO-SIMD128-NOT: v128
651; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
652; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
653; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
654; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
655; SIMD128-NEXT: return $pop[[R]]{{$}}
656define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
657  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
658  %v = load <8 x i16>, <8 x i16>* %s
659  ret <8 x i16> %v
660}
661
662; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
663; NO-SIMD128-NOT: v128
664; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
665; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 2{{$}}
666; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
667; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
668; SIMD128-NEXT: return $pop[[R]]{{$}}
669define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
670  %s = getelementptr i16, i16* %p, i32 1
671  %e = load i16, i16* %s
672  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
673  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
674  ret <8 x i16> %v2
675}
676
677; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
678; NO-SIMD128-NOT: v128
679; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
680; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
681; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
682; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
683; SIMD128-NEXT: return $pop[[R]]{{$}}
684define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
685  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
686  %v = load <8 x i8>, <8 x i8>* %s
687  %v2 = sext <8 x i8> %v to <8 x i16>
688  ret <8 x i16> %v2
689}
690
691; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
692; NO-SIMD128-NOT: v128
693; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
694; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
695; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
696; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
697; SIMD128-NEXT: return $pop[[R]]{{$}}
698define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
699  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
700  %v = load <8 x i8>, <8 x i8>* %s
701  %v2 = zext <8 x i8> %v to <8 x i16>
702  ret <8 x i16> %v2
703}
704
705; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
706; NO-SIMD128-NOT: load8x8
707; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
708; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
709; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
710; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
711; SIMD128-NEXT: return $pop[[R]]{{$}}
712define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) {
713  %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1
714  %v = load <8 x i8>, <8 x i8>* %s
715  ret <8 x i8> %v
716}
717
718; CHECK-LABEL: load_v8i16_from_numeric_address:
719; NO-SIMD128-NOT: v128
720; SIMD128-NEXT: .functype load_v8i16_from_numeric_address () -> (v128){{$}}
721; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
722; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
723; SIMD128-NEXT: return $pop[[R]]{{$}}
724define <8 x i16> @load_v8i16_from_numeric_address() {
725  %s = inttoptr i32 32 to <8 x i16>*
726  %v = load <8 x i16>, <8 x i16>* %s
727  ret <8 x i16> %v
728}
729
730; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
731; NO-SIMD128-NOT: v128
732; SIMD128-NEXT: .functype load_splat_v8i16_from_numeric_address () -> (v128){{$}}
733; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
734; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
735; SIMD128-NEXT: return $pop[[R]]{{$}}
736define <8 x i16> @load_splat_v8i16_from_numeric_address() {
737  %s = inttoptr i32 32 to i16*
738  %e = load i16, i16* %s
739  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
740  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
741  ret <8 x i16> %v2
742}
743
744; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
745; NO-SIMD128-NOT: v128
746; SIMD128-NEXT: .functype load_sext_v8i16_from_numeric_address () -> (v128){{$}}
747; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
748; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
749; SIMD128-NEXT: return $pop[[R]]{{$}}
750define <8 x i16> @load_sext_v8i16_from_numeric_address() {
751  %s = inttoptr i32 32 to <8 x i8>*
752  %v = load <8 x i8>, <8 x i8>* %s
753  %v2 = sext <8 x i8> %v to <8 x i16>
754  ret <8 x i16> %v2
755}
756
757; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
758; NO-SIMD128-NOT: v128
759; SIMD128-NEXT: .functype load_zext_v8i16_from_numeric_address () -> (v128){{$}}
760; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
761; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
762; SIMD128-NEXT: return $pop[[R]]{{$}}
763define <8 x i16> @load_zext_v8i16_from_numeric_address() {
764  %s = inttoptr i32 32 to <8 x i8>*
765  %v = load <8 x i8>, <8 x i8>* %s
766  %v2 = zext <8 x i8> %v to <8 x i16>
767  ret <8 x i16> %v2
768}
769
770; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
771; NO-SIMD128-NOT: load8x8
772; SIMD128-NEXT: .functype load_ext_v8i16_from_numeric_address () -> (v128){{$}}
773; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
774; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
775; SIMD128-NEXT: return $pop[[R]]{{$}}
776define <8 x i8> @load_ext_v8i16_from_numeric_address() {
777  %s = inttoptr i32 32 to <8 x i8>*
778  %v = load <8 x i8>, <8 x i8>* %s
779  ret <8 x i8> %v
780}
781
782; CHECK-LABEL: load_v8i16_from_global_address:
783; NO-SIMD128-NOT: v128
784; SIMD128-NEXT: .functype load_v8i16_from_global_address () -> (v128){{$}}
785; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
786; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v8i16($pop[[L0]]){{$}}
787; SIMD128-NEXT: return $pop[[R]]{{$}}
788@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
789define <8 x i16> @load_v8i16_from_global_address() {
790  %v = load <8 x i16>, <8 x i16>* @gv_v8i16
791  ret <8 x i16> %v
792}
793
794; CHECK-LABEL: load_splat_v8i16_from_global_address:
795; NO-SIMD128-NOT: v128
796; SIMD128-NEXT: .functype load_splat_v8i16_from_global_address () -> (v128){{$}}
797; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
798; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, gv_i16($pop[[L0]]){{$}}
799; SIMD128-NEXT: return $pop[[R]]{{$}}
800@gv_i16 = global i16 42
801define <8 x i16> @load_splat_v8i16_from_global_address() {
802  %e = load i16, i16* @gv_i16
803  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
804  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
805  ret <8 x i16> %v2
806}
807
808; CHECK-LABEL: load_sext_v8i16_from_global_address:
809; NO-SIMD128-NOT: v128
810; SIMD128-NEXT: .functype load_sext_v8i16_from_global_address () -> (v128){{$}}
811; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
812; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}}
813; SIMD128-NEXT: return $pop[[R]]{{$}}
814@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
815define <8 x i16> @load_sext_v8i16_from_global_address() {
816  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
817  %v2 = sext <8 x i8> %v to <8 x i16>
818  ret <8 x i16> %v2
819}
820
821; CHECK-LABEL: load_zext_v8i16_from_global_address:
822; NO-SIMD128-NOT: v128
823; SIMD128-NEXT: .functype load_zext_v8i16_from_global_address () -> (v128){{$}}
824; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
825; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}}
826; SIMD128-NEXT: return $pop[[R]]{{$}}
827define <8 x i16> @load_zext_v8i16_from_global_address() {
828  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
829  %v2 = zext <8 x i8> %v to <8 x i16>
830  ret <8 x i16> %v2
831}
832
833; CHECK-LABEL: load_ext_v8i16_from_global_address:
834; NO-SIMD128-NOT: load8x8
835; SIMD128-NEXT: .functype load_ext_v8i16_from_global_address () -> (v128){{$}}
836; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
837; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}}
838; SIMD128-NEXT: return $pop[[R]]{{$}}
839define <8 x i8> @load_ext_v8i16_from_global_address() {
840  %v = load <8 x i8>, <8 x i8>* @gv_v8i8
841  ret <8 x i8> %v
842}
843
844
845; CHECK-LABEL: store_v8i16:
846; NO-SIMD128-NOT: v128
847; SIMD128-NEXT: .functype store_v8i16 (v128, i32) -> (){{$}}
848; SIMD128-NEXT: v128.store 0($1), $0{{$}}
849define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
850  store <8 x i16> %v , <8 x i16>* %p
851  ret void
852}
853
854; CHECK-LABEL: store_v8i16_with_folded_offset:
855; NO-SIMD128-NOT: v128
856; SIMD128-NEXT: .functype store_v8i16_with_folded_offset (v128, i32) -> (){{$}}
857; SIMD128-NEXT: v128.store 16($1), $0{{$}}
858define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
859  %q = ptrtoint <8 x i16>* %p to i32
860  %r = add nuw i32 %q, 16
861  %s = inttoptr i32 %r to <8 x i16>*
862  store <8 x i16> %v , <8 x i16>* %s
863  ret void
864}
865
866; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
867; NO-SIMD128-NOT: v128
868; SIMD128-NEXT: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> (){{$}}
869; SIMD128-NEXT: v128.store 16($1), $0{{$}}
870define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
871  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
872  store <8 x i16> %v , <8 x i16>* %s
873  ret void
874}
875
876; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
877; NO-SIMD128-NOT: v128
878; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
879; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
880; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
881; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
882define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
883  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
884  store <8 x i16> %v , <8 x i16>* %s
885  ret void
886}
887
888; CHECK-LABEL: store_v8i16_with_unfolded_offset:
889; NO-SIMD128-NOT: v128
890; SIMD128-NEXT: .functype store_v8i16_with_unfolded_offset (v128, i32) -> (){{$}}
891; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
892; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
893; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
894define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
895  %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
896  store <8 x i16> %v , <8 x i16>* %s
897  ret void
898}
899
900; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
901; NO-SIMD128-NOT: v128
902; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> (){{$}}
903; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
904; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
905; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
906define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
907  %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
908  store <8 x i16> %v , <8 x i16>* %s
909  ret void
910}
911
912; CHECK-LABEL: store_v8i16_to_numeric_address:
913; NO-SIMD128-NOT: v128
914; SIMD128-NEXT: .functype store_v8i16_to_numeric_address (v128) -> (){{$}}
915; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
916; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
917define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
918  %s = inttoptr i32 32 to <8 x i16>*
919  store <8 x i16> %v , <8 x i16>* %s
920  ret void
921}
922
923; CHECK-LABEL: store_v8i16_to_global_address:
924; NO-SIMD128-NOT: v128
925; SIMD128-NEXT: .functype store_v8i16_to_global_address (v128) -> (){{$}}
926; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
927; SIMD128-NEXT: v128.store gv_v8i16($pop[[R]]), $0{{$}}
928define void @store_v8i16_to_global_address(<8 x i16> %v) {
929  store <8 x i16> %v , <8 x i16>* @gv_v8i16
930  ret void
931}
932
933; ==============================================================================
934; 4 x i32
935; ==============================================================================
936; CHECK-LABEL: load_v4i32:
937; NO-SIMD128-NOT: v128
938; SIMD128-NEXT: .functype load_v4i32 (i32) -> (v128){{$}}
939; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
940; SIMD128-NEXT: return $pop[[R]]{{$}}
941define <4 x i32> @load_v4i32(<4 x i32>* %p) {
942  %v = load <4 x i32>, <4 x i32>* %p
943  ret <4 x i32> %v
944}
945
946; CHECK-LABEL: load_splat_v4i32:
947; NO-SIMD128-NOT: v128
948; SIMD128-NEXT: .functype load_splat_v4i32 (i32) -> (v128){{$}}
949; SIMD128-NEXT: v32x4.load_splat
950define <4 x i32> @load_splat_v4i32(i32* %addr) {
951  %e = load i32, i32* %addr, align 4
952  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
953  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
954  ret <4 x i32> %v2
955}
956
957; CHECK-LABEL: load_sext_v4i32:
958; NO-SIMD128-NOT: v128
959; SIMD128-NEXT: .functype load_sext_v4i32 (i32) -> (v128){{$}}
960; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($0){{$}}
961; SIMD128-NEXT: return $pop[[R]]{{$}}
962define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) {
963  %v = load <4 x i16>, <4 x i16>* %p
964  %v2 = sext <4 x i16> %v to <4 x i32>
965  ret <4 x i32> %v2
966}
967
968; CHECK-LABEL: load_zext_v4i32:
969; NO-SIMD128-NOT: v128
970; SIMD128-NEXT: .functype load_zext_v4i32 (i32) -> (v128){{$}}
971; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($0){{$}}
972; SIMD128-NEXT: return $pop[[R]]{{$}}
973define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) {
974  %v = load <4 x i16>, <4 x i16>* %p
975  %v2 = zext <4 x i16> %v to <4 x i32>
976  ret <4 x i32> %v2
977}
978
979; CHECK-LABEL: load_ext_v4i32:
980; NO-SIMD128-NOT: load16x4
981; SIMD128-NEXT: .functype load_ext_v4i32 (i32) -> (v128){{$}}
982; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($0){{$}}
983; SIMD128-NEXT: return $pop[[R]]{{$}}
984define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) {
985  %v = load <4 x i16>, <4 x i16>* %p
986  ret <4 x i16> %v
987}
988
989; CHECK-LABEL: load_v4i32_with_folded_offset:
990; NO-SIMD128-NOT: v128
991; SIMD128-NEXT: .functype load_v4i32_with_folded_offset (i32) -> (v128){{$}}
992; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
993; SIMD128-NEXT: return $pop[[R]]{{$}}
994define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
995  %q = ptrtoint <4 x i32>* %p to i32
996  %r = add nuw i32 %q, 16
997  %s = inttoptr i32 %r to <4 x i32>*
998  %v = load <4 x i32>, <4 x i32>* %s
999  ret <4 x i32> %v
1000}
1001
1002; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1003; NO-SIMD128-NOT: v128
1004; SIMD128-NEXT: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128){{$}}
1005; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
1006; SIMD128-NEXT: return $pop[[R]]{{$}}
1007define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
1008  %q = ptrtoint i32* %p to i32
1009  %r = add nuw i32 %q, 16
1010  %s = inttoptr i32 %r to i32*
1011  %e = load i32, i32* %s
1012  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1013  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1014  ret <4 x i32> %v2
1015}
1016
1017; CHECK-LABEL: load_sext_v4i32_with_folded_offset:
1018; NO-SIMD128-NOT: v128
1019; SIMD128-NEXT: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128){{$}}
1020; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 16($0){{$}}
1021; SIMD128-NEXT: return $pop[[R]]{{$}}
1022define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) {
1023  %q = ptrtoint <4 x i16>* %p to i32
1024  %r = add nuw i32 %q, 16
1025  %s = inttoptr i32 %r to <4 x i16>*
1026  %v = load <4 x i16>, <4 x i16>* %s
1027  %v2 = sext <4 x i16> %v to <4 x i32>
1028  ret <4 x i32> %v2
1029}
1030
1031; CHECK-LABEL: load_zext_v4i32_with_folded_offset:
1032; NO-SIMD128-NOT: v128
1033; SIMD128-NEXT: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128){{$}}
1034; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 16($0){{$}}
1035; SIMD128-NEXT: return $pop[[R]]{{$}}
1036define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) {
1037  %q = ptrtoint <4 x i16>* %p to i32
1038  %r = add nuw i32 %q, 16
1039  %s = inttoptr i32 %r to <4 x i16>*
1040  %v = load <4 x i16>, <4 x i16>* %s
1041  %v2 = zext <4 x i16> %v to <4 x i32>
1042  ret <4 x i32> %v2
1043}
1044
1045; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1046; NO-SIMD128-NOT: load16x4
1047; SIMD128-NEXT: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128){{$}}
1048; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 16($0){{$}}
1049; SIMD128-NEXT: return $pop[[R]]{{$}}
1050define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) {
1051  %q = ptrtoint <4 x i16>* %p to i32
1052  %r = add nuw i32 %q, 16
1053  %s = inttoptr i32 %r to <4 x i16>*
1054  %v = load <4 x i16>, <4 x i16>* %s
1055  ret <4 x i16> %v
1056}
1057
1058; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1059; NO-SIMD128-NOT: v128
1060; SIMD128-NEXT: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1061; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1062; SIMD128-NEXT: return $pop[[R]]{{$}}
1063define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
1064  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1065  %v = load <4 x i32>, <4 x i32>* %s
1066  ret <4 x i32> %v
1067}
1068
1069; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1070; NO-SIMD128-NOT: v128
1071; SIMD128-NEXT: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1072; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 4($0){{$}}
1073; SIMD128-NEXT: return $pop[[R]]{{$}}
1074define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
1075  %s = getelementptr inbounds i32, i32* %p, i32 1
1076  %e = load i32, i32* %s
1077  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1078  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1079  ret <4 x i32> %v2
1080}
1081
1082; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset:
1083; NO-SIMD128-NOT: v128
1084; SIMD128-NEXT: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1085; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 8($0){{$}}
1086; SIMD128-NEXT: return $pop[[R]]{{$}}
1087define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1088  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1089  %v = load <4 x i16>, <4 x i16>* %s
1090  %v2 = sext <4 x i16> %v to <4 x i32>
1091  ret <4 x i32> %v2
1092}
1093
1094; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset:
1095; NO-SIMD128-NOT: v128
1096; SIMD128-NEXT: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1097; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 8($0){{$}}
1098; SIMD128-NEXT: return $pop[[R]]{{$}}
1099define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1100  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1101  %v = load <4 x i16>, <4 x i16>* %s
1102  %v2 = zext <4 x i16> %v to <4 x i32>
1103  ret <4 x i32> %v2
1104}
1105
1106; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1107; NO-SIMD128-NOT: load16x4
1108; SIMD128-NEXT: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
1109; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 8($0){{$}}
1110; SIMD128-NEXT: return $pop[[R]]{{$}}
1111define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) {
1112  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1
1113  %v = load <4 x i16>, <4 x i16>* %s
1114  ret <4 x i16> %v
1115}
1116
1117; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1118; NO-SIMD128-NOT: v128
1119; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1120; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1121; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1122; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1123; SIMD128-NEXT: return $pop[[R]]{{$}}
1124define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
1125  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1126  %v = load <4 x i32>, <4 x i32>* %s
1127  ret <4 x i32> %v
1128}
1129
1130; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1131; NO-SIMD128-NOT: v128
1132; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1133; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -4{{$}}
1134; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1135; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1136; SIMD128-NEXT: return $pop[[R]]{{$}}
1137define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
1138  %s = getelementptr inbounds i32, i32* %p, i32 -1
1139  %e = load i32, i32* %s
1140  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1141  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1142  ret <4 x i32> %v2
1143}
1144
1145; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset:
1146; NO-SIMD128-NOT: v128
1147; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1148; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1149; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1150; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1151; SIMD128-NEXT: return $pop[[R]]{{$}}
1152define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1153  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1154  %v = load <4 x i16>, <4 x i16>* %s
1155  %v2 = sext <4 x i16> %v to <4 x i32>
1156  ret <4 x i32> %v2
1157}
1158
1159; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset:
1160; NO-SIMD128-NOT: v128
1161; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1162; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1163; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1164; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1165; SIMD128-NEXT: return $pop[[R]]{{$}}
1166define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1167  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1168  %v = load <4 x i16>, <4 x i16>* %s
1169  %v2 = zext <4 x i16> %v to <4 x i32>
1170  ret <4 x i32> %v2
1171}
1172
1173; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1174; NO-SIMD128-NOT: load16x4
1175; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1176; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1177; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1178; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1179; SIMD128-NEXT: return $pop[[R]]{{$}}
1180define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) {
1181  %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1
1182  %v = load <4 x i16>, <4 x i16>* %s
1183  ret <4 x i16> %v
1184}
1185
1186; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1187; NO-SIMD128-NOT: v128
1188; SIMD128-NEXT: .functype load_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1189; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1190; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1191; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1192; SIMD128-NEXT: return $pop[[R]]{{$}}
1193define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
1194  %q = ptrtoint <4 x i32>* %p to i32
1195  %r = add nsw i32 %q, 16
1196  %s = inttoptr i32 %r to <4 x i32>*
1197  %v = load <4 x i32>, <4 x i32>* %s
1198  ret <4 x i32> %v
1199}
1200
1201; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1202; NO-SIMD128-NOT: v128
1203; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1204; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1205; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1206; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1207; SIMD128-NEXT: return $pop[[R]]{{$}}
1208define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
1209  %q = ptrtoint i32* %p to i32
1210  %r = add nsw i32 %q, 16
1211  %s = inttoptr i32 %r to i32*
1212  %e = load i32, i32* %s
1213  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1214  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1215  ret <4 x i32> %v2
1216}
1217
1218; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset:
1219; NO-SIMD128-NOT: v128
1220; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1221; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1222; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1223; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1224; SIMD128-NEXT: return $pop[[R]]{{$}}
1225define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1226  %q = ptrtoint <4 x i16>* %p to i32
1227  %r = add nsw i32 %q, 16
1228  %s = inttoptr i32 %r to <4 x i16>*
1229  %v = load <4 x i16>, <4 x i16>* %s
1230  %v2 = sext <4 x i16> %v to <4 x i32>
1231  ret <4 x i32> %v2
1232}
1233
1234; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset:
1235; NO-SIMD128-NOT: v128
1236; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1237; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1238; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1239; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1240; SIMD128-NEXT: return $pop[[R]]{{$}}
1241define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1242  %q = ptrtoint <4 x i16>* %p to i32
1243  %r = add nsw i32 %q, 16
1244  %s = inttoptr i32 %r to <4 x i16>*
1245  %v = load <4 x i16>, <4 x i16>* %s
1246  %v2 = zext <4 x i16> %v to <4 x i32>
1247  ret <4 x i32> %v2
1248}
1249
1250; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1251; NO-SIMD128-NOT: load16x4
1252; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
1253; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1254; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1255; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1256; SIMD128-NEXT: return $pop[[R]]{{$}}
1257define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) {
1258  %q = ptrtoint <4 x i16>* %p to i32
1259  %r = add nsw i32 %q, 16
1260  %s = inttoptr i32 %r to <4 x i16>*
1261  %v = load <4 x i16>, <4 x i16>* %s
1262  ret <4 x i16> %v
1263}
1264
1265; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1266; NO-SIMD128-NOT: v128
1267; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1268; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1269; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1270; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1271; SIMD128-NEXT: return $pop[[R]]{{$}}
1272define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
1273  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1274  %v = load <4 x i32>, <4 x i32>* %s
1275  ret <4 x i32> %v
1276}
1277
1278; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1279; NO-SIMD128-NOT: v128
1280; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1281; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 4{{$}}
1282; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1283; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1284; SIMD128-NEXT: return $pop[[R]]{{$}}
1285define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
1286  %s = getelementptr i32, i32* %p, i32 1
1287  %e = load i32, i32* %s
1288  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1289  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1290  ret <4 x i32> %v2
1291}
1292
1293; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset:
1294; NO-SIMD128-NOT: v128
1295; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1296; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1297; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1298; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1299; SIMD128-NEXT: return $pop[[R]]{{$}}
1300define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1301  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1302  %v = load <4 x i16>, <4 x i16>* %s
1303  %v2 = sext <4 x i16> %v to <4 x i32>
1304  ret <4 x i32> %v2
1305}
1306
1307; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset:
1308; NO-SIMD128-NOT: v128
1309; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1310; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1311; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1312; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1313; SIMD128-NEXT: return $pop[[R]]{{$}}
1314define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1315  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1316  %v = load <4 x i16>, <4 x i16>* %s
1317  %v2 = zext <4 x i16> %v to <4 x i32>
1318  ret <4 x i32> %v2
1319}
1320
1321; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1322; NO-SIMD128-NOT: load16x4
1323; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1324; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1325; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1326; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1327; SIMD128-NEXT: return $pop[[R]]{{$}}
1328define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) {
1329  %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1
1330  %v = load <4 x i16>, <4 x i16>* %s
1331  ret <4 x i16> %v
1332}
1333
1334; CHECK-LABEL: load_v4i32_from_numeric_address:
1335; NO-SIMD128-NOT: v128
1336; SIMD128-NEXT: .functype load_v4i32_from_numeric_address () -> (v128){{$}}
1337; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1338; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1339; SIMD128-NEXT: return $pop[[R]]{{$}}
1340define <4 x i32> @load_v4i32_from_numeric_address() {
1341  %s = inttoptr i32 32 to <4 x i32>*
1342  %v = load <4 x i32>, <4 x i32>* %s
1343  ret <4 x i32> %v
1344}
1345
1346; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1347; NO-SIMD128-NOT: v128
1348; SIMD128-NEXT: .functype load_splat_v4i32_from_numeric_address () -> (v128){{$}}
1349; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1350; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1351; SIMD128-NEXT: return $pop[[R]]{{$}}
1352define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1353  %s = inttoptr i32 32 to i32*
1354  %e = load i32, i32* %s
1355  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1356  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1357  ret <4 x i32> %v2
1358}
1359
1360; CHECK-LABEL: load_sext_v4i32_from_numeric_address:
1361; NO-SIMD128-NOT: v128
1362; SIMD128-NEXT: .functype load_sext_v4i32_from_numeric_address () -> (v128){{$}}
1363; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1364; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1365; SIMD128-NEXT: return $pop[[R]]{{$}}
1366define <4 x i32> @load_sext_v4i32_from_numeric_address() {
1367  %s = inttoptr i32 32 to <4 x i16>*
1368  %v = load <4 x i16>, <4 x i16>* %s
1369  %v2 = sext <4 x i16> %v to <4 x i32>
1370  ret <4 x i32> %v2
1371}
1372
1373; CHECK-LABEL: load_zext_v4i32_from_numeric_address:
1374; NO-SIMD128-NOT: v128
1375; SIMD128-NEXT: .functype load_zext_v4i32_from_numeric_address () -> (v128){{$}}
1376; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1377; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1378; SIMD128-NEXT: return $pop[[R]]{{$}}
1379define <4 x i32> @load_zext_v4i32_from_numeric_address() {
1380  %s = inttoptr i32 32 to <4 x i16>*
1381  %v = load <4 x i16>, <4 x i16>* %s
1382  %v2 = zext <4 x i16> %v to <4 x i32>
1383  ret <4 x i32> %v2
1384}
1385
1386; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1387; NO-SIMD128-NOT: load16x4
1388; SIMD128-NEXT: .functype load_ext_v4i32_from_numeric_address () -> (v128){{$}}
1389; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1390; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1391; SIMD128-NEXT: return $pop[[R]]{{$}}
1392define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1393  %s = inttoptr i32 32 to <4 x i16>*
1394  %v = load <4 x i16>, <4 x i16>* %s
1395  ret <4 x i16> %v
1396}
1397
1398; CHECK-LABEL: load_v4i32_from_global_address:
1399; NO-SIMD128-NOT: v128
1400; SIMD128-NEXT: .functype load_v4i32_from_global_address () -> (v128){{$}}
1401; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1402; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4i32($pop[[L0]]){{$}}
1403; SIMD128-NEXT: return $pop[[R]]{{$}}
1404@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1405define <4 x i32> @load_v4i32_from_global_address() {
1406  %v = load <4 x i32>, <4 x i32>* @gv_v4i32
1407  ret <4 x i32> %v
1408}
1409
1410; CHECK-LABEL: load_splat_v4i32_from_global_address:
1411; NO-SIMD128-NOT: v128
1412; SIMD128-NEXT: .functype load_splat_v4i32_from_global_address () -> (v128){{$}}
1413; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1414; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, gv_i32($pop[[L0]]){{$}}
1415; SIMD128-NEXT: return $pop[[R]]{{$}}
1416@gv_i32 = global i32 42
1417define <4 x i32> @load_splat_v4i32_from_global_address() {
1418  %e = load i32, i32* @gv_i32
1419  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1420  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1421  ret <4 x i32> %v2
1422}
1423
1424; CHECK-LABEL: load_sext_v4i32_from_global_address:
1425; NO-SIMD128-NOT: v128
1426; SIMD128-NEXT: .functype load_sext_v4i32_from_global_address () -> (v128){{$}}
1427; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1428; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}}
1429; SIMD128-NEXT: return $pop[[R]]{{$}}
1430@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1431define <4 x i32> @load_sext_v4i32_from_global_address() {
1432  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1433  %v2 = sext <4 x i16> %v to <4 x i32>
1434  ret <4 x i32> %v2
1435}
1436
1437; CHECK-LABEL: load_zext_v4i32_from_global_address:
1438; NO-SIMD128-NOT: v128
1439; SIMD128-NEXT: .functype load_zext_v4i32_from_global_address () -> (v128){{$}}
1440; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1441; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}}
1442; SIMD128-NEXT: return $pop[[R]]{{$}}
1443define <4 x i32> @load_zext_v4i32_from_global_address() {
1444  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1445  %v2 = zext <4 x i16> %v to <4 x i32>
1446  ret <4 x i32> %v2
1447}
1448
1449; CHECK-LABEL: load_ext_v4i32_from_global_address:
1450; NO-SIMD128-NOT: load16x4
1451; SIMD128-NEXT: .functype load_ext_v4i32_from_global_address () -> (v128){{$}}
1452; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1453; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}}
1454; SIMD128-NEXT: return $pop[[R]]{{$}}
1455define <4 x i16> @load_ext_v4i32_from_global_address() {
1456  %v = load <4 x i16>, <4 x i16>* @gv_v4i16
1457  ret <4 x i16> %v
1458}
1459
1460; CHECK-LABEL: store_v4i32:
1461; NO-SIMD128-NOT: v128
1462; SIMD128-NEXT: .functype store_v4i32 (v128, i32) -> (){{$}}
1463; SIMD128-NEXT: v128.store 0($1), $0{{$}}
1464define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
1465  store <4 x i32> %v , <4 x i32>* %p
1466  ret void
1467}
1468
1469; CHECK-LABEL: store_v4i32_with_folded_offset:
1470; NO-SIMD128-NOT: v128
1471; SIMD128-NEXT: .functype store_v4i32_with_folded_offset (v128, i32) -> (){{$}}
1472; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1473define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
1474  %q = ptrtoint <4 x i32>* %p to i32
1475  %r = add nuw i32 %q, 16
1476  %s = inttoptr i32 %r to <4 x i32>*
1477  store <4 x i32> %v , <4 x i32>* %s
1478  ret void
1479}
1480
1481; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
1482; NO-SIMD128-NOT: v128
1483; SIMD128-NEXT: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> (){{$}}
1484; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1485define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1486  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
1487  store <4 x i32> %v , <4 x i32>* %s
1488  ret void
1489}
1490
1491; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
1492; NO-SIMD128-NOT: v128
1493; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
1494; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1495; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1496; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1497define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
1498  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1499  store <4 x i32> %v , <4 x i32>* %s
1500  ret void
1501}
1502
1503; CHECK-LABEL: store_v4i32_with_unfolded_offset:
1504; NO-SIMD128-NOT: v128
1505; SIMD128-NEXT: .functype store_v4i32_with_unfolded_offset (v128, i32) -> (){{$}}
1506; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1507; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1508; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1509define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
1510  %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
1511  store <4 x i32> %v , <4 x i32>* %s
1512  ret void
1513}
1514
1515; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
1516; NO-SIMD128-NOT: v128
1517; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> (){{$}}
1518; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1519; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1520; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1521define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
1522  %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
1523  store <4 x i32> %v , <4 x i32>* %s
1524  ret void
1525}
1526
1527; CHECK-LABEL: store_v4i32_to_numeric_address:
1528; NO-SIMD128-NOT: v128
1529; SIMD128-NEXT: .functype store_v4i32_to_numeric_address (v128) -> (){{$}}
1530; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1531; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
1532define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
1533  %s = inttoptr i32 32 to <4 x i32>*
1534  store <4 x i32> %v , <4 x i32>* %s
1535  ret void
1536}
1537
1538; CHECK-LABEL: store_v4i32_to_global_address:
1539; NO-SIMD128-NOT: v128
1540; SIMD128-NEXT: .functype store_v4i32_to_global_address (v128) -> (){{$}}
1541; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
1542; SIMD128-NEXT: v128.store gv_v4i32($pop[[R]]), $0{{$}}
1543define void @store_v4i32_to_global_address(<4 x i32> %v) {
1544  store <4 x i32> %v , <4 x i32>* @gv_v4i32
1545  ret void
1546}
1547
1548; ==============================================================================
1549; 2 x i64
1550; ==============================================================================
1551; CHECK-LABEL: load_v2i64:
1552; NO-SIMD128-NOT: v128
1553; SIMD128-VM-NOT: v128
1554; SIMD128-NEXT: .functype load_v2i64 (i32) -> (v128){{$}}
1555; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
1556; SIMD128-NEXT: return $pop[[R]]{{$}}
1557define <2 x i64> @load_v2i64(<2 x i64>* %p) {
1558  %v = load <2 x i64>, <2 x i64>* %p
1559  ret <2 x i64> %v
1560}
1561
1562; CHECK-LABEL: load_splat_v2i64:
1563; NO-SIMD128-NOT: v128
1564; SIMD128-VM-NOT: v128
1565; SIMD128-NEXT: .functype load_splat_v2i64 (i32) -> (v128){{$}}
1566; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
1567; SIMD128-NEXT: return $pop[[R]]{{$}}
1568define <2 x i64> @load_splat_v2i64(i64* %p) {
1569  %e = load i64, i64* %p
1570  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1571  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1572  ret <2 x i64> %v2
1573}
1574
1575; CHECK-LABEL: load_sext_v2i64:
1576; NO-SIMD128-NOT: v128
1577; SIMD128-VM-NOT: v128
1578; SIMD128-NEXT: .functype load_sext_v2i64 (i32) -> (v128){{$}}
1579; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($0){{$}}
1580; SIMD128-NEXT: return $pop[[R]]{{$}}
1581define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) {
1582  %v = load <2 x i32>, <2 x i32>* %p
1583  %v2 = sext <2 x i32> %v to <2 x i64>
1584  ret <2 x i64> %v2
1585}
1586
1587; CHECK-LABEL: load_zext_v2i64:
1588; NO-SIMD128-NOT: v128
1589; SIMD128-VM-NOT: v128
1590; SIMD128-NEXT: .functype load_zext_v2i64 (i32) -> (v128){{$}}
1591; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($0){{$}}
1592; SIMD128-NEXT: return $pop[[R]]{{$}}
1593define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) {
1594  %v = load <2 x i32>, <2 x i32>* %p
1595  %v2 = zext <2 x i32> %v to <2 x i64>
1596  ret <2 x i64> %v2
1597}
1598
1599; CHECK-LABEL: load_ext_v2i64:
1600; NO-SIMD128-NOT: v128
1601; SIMD128-VM-NOT: load32x2
1602; SIMD128-NEXT: .functype load_ext_v2i64 (i32) -> (v128){{$}}
1603; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($0){{$}}
1604; SIMD128-NEXT: return $pop[[R]]{{$}}
1605define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) {
1606  %v = load <2 x i32>, <2 x i32>* %p
1607  ret <2 x i32> %v
1608}
1609
1610; CHECK-LABEL: load_v2i64_with_folded_offset:
1611; NO-SIMD128-NOT: v128
1612; SIMD128-VM-NOT: v128
1613; SIMD128-NEXT: .functype load_v2i64_with_folded_offset (i32) -> (v128){{$}}
1614; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1615; SIMD128-NEXT: return $pop[[R]]{{$}}
1616define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
1617  %q = ptrtoint <2 x i64>* %p to i32
1618  %r = add nuw i32 %q, 16
1619  %s = inttoptr i32 %r to <2 x i64>*
1620  %v = load <2 x i64>, <2 x i64>* %s
1621  ret <2 x i64> %v
1622}
1623
1624; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
1625; NO-SIMD128-NOT: v128
1626; SIMD128-VM-NOT: v128
1627; SIMD128-NEXT: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128){{$}}
1628; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
1629; SIMD128-NEXT: return $pop[[R]]{{$}}
1630define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
1631  %q = ptrtoint i64* %p to i32
1632  %r = add nuw i32 %q, 16
1633  %s = inttoptr i32 %r to i64*
1634  %e = load i64, i64* %s
1635  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1636  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1637  ret <2 x i64> %v2
1638}
1639
1640; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
1641; NO-SIMD128-NOT: v128
1642; SIMD128-VM-NOT: v128
1643; SIMD128-NEXT: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128){{$}}
1644; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 16($0){{$}}
1645; SIMD128-NEXT: return $pop[[R]]{{$}}
1646define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) {
1647  %q = ptrtoint <2 x i32>* %p to i32
1648  %r = add nuw i32 %q, 16
1649  %s = inttoptr i32 %r to <2 x i32>*
1650  %v = load <2 x i32>, <2 x i32>* %s
1651  %v2 = sext <2 x i32> %v to <2 x i64>
1652  ret <2 x i64> %v2
1653}
1654
1655; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
1656; NO-SIMD128-NOT: v128
1657; SIMD128-VM-NOT: v128
1658; SIMD128-NEXT: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128){{$}}
1659; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 16($0){{$}}
1660; SIMD128-NEXT: return $pop[[R]]{{$}}
1661define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) {
1662  %q = ptrtoint <2 x i32>* %p to i32
1663  %r = add nuw i32 %q, 16
1664  %s = inttoptr i32 %r to <2 x i32>*
1665  %v = load <2 x i32>, <2 x i32>* %s
1666  %v2 = zext <2 x i32> %v to <2 x i64>
1667  ret <2 x i64> %v2
1668}
1669
1670; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
1671; NO-SIMD128-NOT: v128
1672; SIMD128-VM-NOT: load32x2
1673; SIMD128-NEXT: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128){{$}}
1674; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 16($0){{$}}
1675; SIMD128-NEXT: return $pop[[R]]{{$}}
1676define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) {
1677  %q = ptrtoint <2 x i32>* %p to i32
1678  %r = add nuw i32 %q, 16
1679  %s = inttoptr i32 %r to <2 x i32>*
1680  %v = load <2 x i32>, <2 x i32>* %s
1681  ret <2 x i32> %v
1682}
1683
1684; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
1685; NO-SIMD128-NOT: v128
1686; SIMD128-VM-NOT: v128
1687; SIMD128-NEXT: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1688; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1689; SIMD128-NEXT: return $pop[[R]]{{$}}
1690define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
1691  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
1692  %v = load <2 x i64>, <2 x i64>* %s
1693  ret <2 x i64> %v
1694}
1695
1696; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
1697; NO-SIMD128-NOT: v128
1698; SIMD128-VM-NOT: v128
1699; SIMD128-NEXT: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1700; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 8($0){{$}}
1701; SIMD128-NEXT: return $pop[[R]]{{$}}
1702define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
1703  %s = getelementptr inbounds i64, i64* %p, i32 1
1704  %e = load i64, i64* %s
1705  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1706  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1707  ret <2 x i64> %v2
1708}
1709
1710; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
1711; NO-SIMD128-NOT: v128
1712; SIMD128-VM-NOT: v128
1713; SIMD128-NEXT: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1714; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 8($0){{$}}
1715; SIMD128-NEXT: return $pop[[R]]{{$}}
1716define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1717  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1718  %v = load <2 x i32>, <2 x i32>* %s
1719  %v2 = sext <2 x i32> %v to <2 x i64>
1720  ret <2 x i64> %v2
1721}
1722
1723; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
1724; NO-SIMD128-NOT: v128
1725; SIMD128-VM-NOT: v128
1726; SIMD128-NEXT: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1727; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 8($0){{$}}
1728; SIMD128-NEXT: return $pop[[R]]{{$}}
1729define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1730  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1731  %v = load <2 x i32>, <2 x i32>* %s
1732  %v2 = zext <2 x i32> %v to <2 x i64>
1733  ret <2 x i64> %v2
1734}
1735
1736; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
1737; NO-SIMD128-NOT: v128
1738; SIMD128-VM-NOT: load32x2
1739; SIMD128-NEXT: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
1740; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 8($0){{$}}
1741; SIMD128-NEXT: return $pop[[R]]{{$}}
1742define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) {
1743  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1
1744  %v = load <2 x i32>, <2 x i32>* %s
1745  ret <2 x i32> %v
1746}
1747
1748; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
1749; NO-SIMD128-NOT: v128
1750; SIMD128-VM-NOT: v128
1751; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1752; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1753; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1754; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1755; SIMD128-NEXT: return $pop[[R]]{{$}}
1756define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
1757  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
1758  %v = load <2 x i64>, <2 x i64>* %s
1759  ret <2 x i64> %v
1760}
1761
1762; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
1763; NO-SIMD128-NOT: v128
1764; SIMD128-VM-NOT: v128
1765; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1766; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1767; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1768; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1769; SIMD128-NEXT: return $pop[[R]]{{$}}
1770define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
1771  %s = getelementptr inbounds i64, i64* %p, i32 -1
1772  %e = load i64, i64* %s
1773  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1774  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1775  ret <2 x i64> %v2
1776}
1777
1778; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
1779; NO-SIMD128-NOT: v128
1780; SIMD128-VM-NOT: v128
1781; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1782; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1783; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1784; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1785; SIMD128-NEXT: return $pop[[R]]{{$}}
1786define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1787  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1788  %v = load <2 x i32>, <2 x i32>* %s
1789  %v2 = sext <2 x i32> %v to <2 x i64>
1790  ret <2 x i64> %v2
1791}
1792
1793; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
1794; NO-SIMD128-NOT: v128
1795; SIMD128-VM-NOT: v128
1796; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1797; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1798; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1799; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1800; SIMD128-NEXT: return $pop[[R]]{{$}}
1801define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1802  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1803  %v = load <2 x i32>, <2 x i32>* %s
1804  %v2 = zext <2 x i32> %v to <2 x i64>
1805  ret <2 x i64> %v2
1806}
1807
1808; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
1809; NO-SIMD128-NOT: v128
1810; SIMD128-VM-NOT: load32x2
1811; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1812; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1813; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1814; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1815; SIMD128-NEXT: return $pop[[R]]{{$}}
1816define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) {
1817  %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1
1818  %v = load <2 x i32>, <2 x i32>* %s
1819  ret <2 x i32> %v
1820}
1821
1822; CHECK-LABEL: load_v2i64_with_unfolded_offset:
1823; NO-SIMD128-NOT: v128
1824; SIMD128-VM-NOT: v128
1825; SIMD128-NEXT: .functype load_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1826; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1827; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1828; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1829; SIMD128-NEXT: return $pop[[R]]{{$}}
1830define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
1831  %q = ptrtoint <2 x i64>* %p to i32
1832  %r = add nsw i32 %q, 16
1833  %s = inttoptr i32 %r to <2 x i64>*
1834  %v = load <2 x i64>, <2 x i64>* %s
1835  ret <2 x i64> %v
1836}
1837
1838; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
1839; NO-SIMD128-NOT: v128
1840; SIMD128-VM-NOT: v128
1841; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1842; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1843; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1844; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1845; SIMD128-NEXT: return $pop[[R]]{{$}}
1846define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
1847  %q = ptrtoint i64* %p to i32
1848  %r = add nsw i32 %q, 16
1849  %s = inttoptr i32 %r to i64*
1850  %e = load i64, i64* %s
1851  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1852  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1853  ret <2 x i64> %v2
1854}
1855
1856; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
1857; NO-SIMD128-NOT: v128
1858; SIMD128-VM-NOT: v128
1859; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1860; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1861; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1862; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1863; SIMD128-NEXT: return $pop[[R]]{{$}}
1864define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
1865  %q = ptrtoint <2 x i32>* %p to i32
1866  %r = add nsw i32 %q, 16
1867  %s = inttoptr i32 %r to <2 x i32>*
1868  %v = load <2 x i32>, <2 x i32>* %s
1869  %v2 = sext <2 x i32> %v to <2 x i64>
1870  ret <2 x i64> %v2
1871}
1872
1873; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
1874; NO-SIMD128-NOT: v128
1875; SIMD128-VM-NOT: v128
1876; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1877; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1878; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1879; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1880; SIMD128-NEXT: return $pop[[R]]{{$}}
1881define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
1882  %q = ptrtoint <2 x i32>* %p to i32
1883  %r = add nsw i32 %q, 16
1884  %s = inttoptr i32 %r to <2 x i32>*
1885  %v = load <2 x i32>, <2 x i32>* %s
1886  %v2 = zext <2 x i32> %v to <2 x i64>
1887  ret <2 x i64> %v2
1888}
1889
1890; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
1891; NO-SIMD128-NOT: v128
1892; SIMD128-VM-NOT: load32x2
1893; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1894; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1895; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1896; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1897; SIMD128-NEXT: return $pop[[R]]{{$}}
1898define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) {
1899  %q = ptrtoint <2 x i32>* %p to i32
1900  %r = add nsw i32 %q, 16
1901  %s = inttoptr i32 %r to <2 x i32>*
1902  %v = load <2 x i32>, <2 x i32>* %s
1903  ret <2 x i32> %v
1904}
1905
1906; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
1907; NO-SIMD128-NOT: v128
1908; SIMD128-VM-NOT: v128
1909; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1910; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1911; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1912; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1913; SIMD128-NEXT: return $pop[[R]]{{$}}
1914define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
1915  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
1916  %v = load <2 x i64>, <2 x i64>* %s
1917  ret <2 x i64> %v
1918}
1919
1920; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
1921; NO-SIMD128-NOT: v128
1922; SIMD128-VM-NOT: v128
1923; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1924; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1925; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1926; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1927; SIMD128-NEXT: return $pop[[R]]{{$}}
1928define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
1929  %s = getelementptr i64, i64* %p, i32 1
1930  %e = load i64, i64* %s
1931  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1932  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1933  ret <2 x i64> %v2
1934}
1935
1936; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
1937; NO-SIMD128-NOT: v128
1938; SIMD128-VM-NOT: v128
1939; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1940; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1941; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1942; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1943; SIMD128-NEXT: return $pop[[R]]{{$}}
1944define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
1945  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
1946  %v = load <2 x i32>, <2 x i32>* %s
1947  %v2 = sext <2 x i32> %v to <2 x i64>
1948  ret <2 x i64> %v2
1949}
1950
1951; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
1952; NO-SIMD128-NOT: v128
1953; SIMD128-VM-NOT: v128
1954; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1955; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1956; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1957; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1958; SIMD128-NEXT: return $pop[[R]]{{$}}
1959define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
1960  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
1961  %v = load <2 x i32>, <2 x i32>* %s
1962  %v2 = zext <2 x i32> %v to <2 x i64>
1963  ret <2 x i64> %v2
1964}
1965
1966; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
1967; NO-SIMD128-NOT: v128
1968; SIMD128-VM-NOT: load32x2
1969; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1970; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1971; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1972; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1973; SIMD128-NEXT: return $pop[[R]]{{$}}
1974define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) {
1975  %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1
1976  %v = load <2 x i32>, <2 x i32>* %s
1977  ret <2 x i32> %v
1978}
1979
1980; CHECK-LABEL: load_v2i64_from_numeric_address:
1981; NO-SIMD128-NOT: v128
1982; SIMD128-VM-NOT: v128
1983; SIMD128-NEXT: .functype load_v2i64_from_numeric_address () -> (v128){{$}}
1984; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1985; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1986; SIMD128-NEXT: return $pop[[R]]{{$}}
1987define <2 x i64> @load_v2i64_from_numeric_address() {
1988  %s = inttoptr i32 32 to <2 x i64>*
1989  %v = load <2 x i64>, <2 x i64>* %s
1990  ret <2 x i64> %v
1991}
1992
1993; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
1994; NO-SIMD128-NOT: v128
1995; SIMD128-VM-NOT: v128
1996; SIMD128-NEXT: .functype load_splat_v2i64_from_numeric_address () -> (v128){{$}}
1997; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1998; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1999; SIMD128-NEXT: return $pop[[R]]{{$}}
2000define <2 x i64> @load_splat_v2i64_from_numeric_address() {
2001  %s = inttoptr i32 32 to i64*
2002  %e = load i64, i64* %s
2003  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2004  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2005  ret <2 x i64> %v2
2006}
2007
2008; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
2009; NO-SIMD128-NOT: v128
2010; SIMD128-VM-NOT: v128
2011; SIMD128-NEXT: .functype load_sext_v2i64_from_numeric_address () -> (v128){{$}}
2012; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2013; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2014; SIMD128-NEXT: return $pop[[R]]{{$}}
2015define <2 x i64> @load_sext_v2i64_from_numeric_address() {
2016  %s = inttoptr i32 32 to <2 x i32>*
2017  %v = load <2 x i32>, <2 x i32>* %s
2018  %v2 = sext <2 x i32> %v to <2 x i64>
2019  ret <2 x i64> %v2
2020}
2021
2022; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
2023; NO-SIMD128-NOT: v128
2024; SIMD128-VM-NOT: v128
2025; SIMD128-NEXT: .functype load_zext_v2i64_from_numeric_address () -> (v128){{$}}
2026; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2027; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2028; SIMD128-NEXT: return $pop[[R]]{{$}}
2029define <2 x i64> @load_zext_v2i64_from_numeric_address() {
2030  %s = inttoptr i32 32 to <2 x i32>*
2031  %v = load <2 x i32>, <2 x i32>* %s
2032  %v2 = zext <2 x i32> %v to <2 x i64>
2033  ret <2 x i64> %v2
2034}
2035
2036; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2037; NO-SIMD128-NOT: v128
2038; SIMD128-VM-NOT: load32x2
2039; SIMD128-NEXT: .functype load_ext_v2i64_from_numeric_address () -> (v128){{$}}
2040; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2041; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2042; SIMD128-NEXT: return $pop[[R]]{{$}}
2043define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2044  %s = inttoptr i32 32 to <2 x i32>*
2045  %v = load <2 x i32>, <2 x i32>* %s
2046  ret <2 x i32> %v
2047}
2048
2049; CHECK-LABEL: load_v2i64_from_global_address:
2050; NO-SIMD128-NOT: v128
2051; SIMD128-VM-NOT: v128
2052; SIMD128-NEXT: .functype load_v2i64_from_global_address () -> (v128){{$}}
2053; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2054; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2i64($pop[[L0]]){{$}}
2055; SIMD128-NEXT: return $pop[[R]]{{$}}
2056@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2057define <2 x i64> @load_v2i64_from_global_address() {
2058  %v = load <2 x i64>, <2 x i64>* @gv_v2i64
2059  ret <2 x i64> %v
2060}
2061
2062; CHECK-LABEL: load_splat_v2i64_from_global_address:
2063; NO-SIMD128-NOT: v128
2064; SIMD128-VM-NOT: v128
2065; SIMD128-NEXT: .functype load_splat_v2i64_from_global_address () -> (v128){{$}}
2066; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2067; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, gv_i64($pop[[L0]]){{$}}
2068; SIMD128-NEXT: return $pop[[R]]{{$}}
2069@gv_i64 = global i64 42
2070define <2 x i64> @load_splat_v2i64_from_global_address() {
2071  %e = load i64, i64* @gv_i64
2072  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2073  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2074  ret <2 x i64> %v2
2075}
2076
2077; CHECK-LABEL: load_sext_v2i64_from_global_address:
2078; NO-SIMD128-NOT: v128
2079; SIMD128-VM-NOT: v128
2080; SIMD128-NEXT: .functype load_sext_v2i64_from_global_address () -> (v128){{$}}
2081; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2082; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}}
2083; SIMD128-NEXT: return $pop[[R]]{{$}}
2084@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2085define <2 x i64> @load_sext_v2i64_from_global_address() {
2086  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2087  %v2 = sext <2 x i32> %v to <2 x i64>
2088  ret <2 x i64> %v2
2089}
2090
2091; CHECK-LABEL: load_zext_v2i64_from_global_address:
2092; NO-SIMD128-NOT: v128
2093; SIMD128-VM-NOT: v128
2094; SIMD128-NEXT: .functype load_zext_v2i64_from_global_address () -> (v128){{$}}
2095; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2096; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}}
2097; SIMD128-NEXT: return $pop[[R]]{{$}}
2098define <2 x i64> @load_zext_v2i64_from_global_address() {
2099  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2100  %v2 = zext <2 x i32> %v to <2 x i64>
2101  ret <2 x i64> %v2
2102}
2103
2104; CHECK-LABEL: load_ext_v2i64_from_global_address:
2105; NO-SIMD128-NOT: v128
2106; SIMD128-VM-NOT: load32x2
2107; SIMD128-NEXT: .functype load_ext_v2i64_from_global_address () -> (v128){{$}}
2108; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2109; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}}
2110; SIMD128-NEXT: return $pop[[R]]{{$}}
2111define <2 x i32> @load_ext_v2i64_from_global_address() {
2112  %v = load <2 x i32>, <2 x i32>* @gv_v2i32
2113  ret <2 x i32> %v
2114}
2115
2116; CHECK-LABEL: store_v2i64:
2117; NO-SIMD128-NOT: v128
2118; SIMD128-VM-NOT: v128
2119; SIMD128-NEXT: .functype store_v2i64 (v128, i32) -> (){{$}}
2120; SIMD128-NEXT: v128.store 0($1), $0{{$}}
2121define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
2122  store <2 x i64> %v , <2 x i64>* %p
2123  ret void
2124}
2125
2126; CHECK-LABEL: store_v2i64_with_folded_offset:
2127; NO-SIMD128-NOT: v128
2128; SIMD128-VM-NOT: v128
2129; SIMD128-NEXT: .functype store_v2i64_with_folded_offset (v128, i32) -> (){{$}}
2130; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2131define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
2132  %q = ptrtoint <2 x i64>* %p to i32
2133  %r = add nuw i32 %q, 16
2134  %s = inttoptr i32 %r to <2 x i64>*
2135  store <2 x i64> %v , <2 x i64>* %s
2136  ret void
2137}
2138
2139; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2140; NO-SIMD128-NOT: v128
2141; SIMD128-VM-NOT: v128
2142; SIMD128-NEXT: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> (){{$}}
2143; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2144define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2145  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
2146  store <2 x i64> %v , <2 x i64>* %s
2147  ret void
2148}
2149
2150; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2151; NO-SIMD128-NOT: v128
2152; SIMD128-VM-NOT: v128
2153; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
2154; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2155; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2156; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2157define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
2158  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2159  store <2 x i64> %v , <2 x i64>* %s
2160  ret void
2161}
2162
2163; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2164; NO-SIMD128-NOT: v128
2165; SIMD128-VM-NOT: v128
2166; SIMD128-NEXT: .functype store_v2i64_with_unfolded_offset (v128, i32) -> (){{$}}
2167; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2168; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2169; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2170define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
2171  %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
2172  store <2 x i64> %v , <2 x i64>* %s
2173  ret void
2174}
2175
2176; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2177; NO-SIMD128-NOT: v128
2178; SIMD128-VM-NOT: v128
2179; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> (){{$}}
2180; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2181; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2182; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2183define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
2184  %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
2185  store <2 x i64> %v , <2 x i64>* %s
2186  ret void
2187}
2188
2189; CHECK-LABEL: store_v2i64_to_numeric_address:
2190; NO-SIMD128-NOT: v128
2191; SIMD128-VM-NOT: v128
2192; SIMD128-NEXT: .functype store_v2i64_to_numeric_address (v128) -> (){{$}}
2193; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2194; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
2195define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2196  %s = inttoptr i32 32 to <2 x i64>*
2197  store <2 x i64> %v , <2 x i64>* %s
2198  ret void
2199}
2200
2201; CHECK-LABEL: store_v2i64_to_global_address:
2202; NO-SIMD128-NOT: v128
2203; SIMD128-VM-NOT: v128
2204; SIMD128-NEXT: .functype store_v2i64_to_global_address (v128) -> (){{$}}
2205; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
2206; SIMD128-NEXT: v128.store gv_v2i64($pop[[R]]), $0{{$}}
2207define void @store_v2i64_to_global_address(<2 x i64> %v) {
2208  store <2 x i64> %v , <2 x i64>* @gv_v2i64
2209  ret void
2210}
2211
2212; ==============================================================================
2213; 4 x float
2214; ==============================================================================
2215; CHECK-LABEL: load_v4f32:
2216; NO-SIMD128-NOT: v128
2217; SIMD128-NEXT: .functype load_v4f32 (i32) -> (v128){{$}}
2218; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
2219; SIMD128-NEXT: return $pop[[R]]{{$}}
2220define <4 x float> @load_v4f32(<4 x float>* %p) {
2221  %v = load <4 x float>, <4 x float>* %p
2222  ret <4 x float> %v
2223}
2224
2225; CHECK-LABEL: load_splat_v4f32:
2226; NO-SIMD128-NOT: v128
2227; SIMD128-NEXT: .functype load_splat_v4f32 (i32) -> (v128){{$}}
2228; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
2229; SIMD128-NEXT: return $pop[[R]]{{$}}
2230define <4 x float> @load_splat_v4f32(float* %p) {
2231  %e = load float, float* %p
2232  %v1 = insertelement <4 x float> undef, float %e, i32 0
2233  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2234  ret <4 x float> %v2
2235}
2236
2237; CHECK-LABEL: load_v4f32_with_folded_offset:
2238; NO-SIMD128-NOT: v128
2239; SIMD128-NEXT: .functype load_v4f32_with_folded_offset (i32) -> (v128){{$}}
2240; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2241; SIMD128-NEXT: return $pop[[R]]{{$}}
2242define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
2243  %q = ptrtoint <4 x float>* %p to i32
2244  %r = add nuw i32 %q, 16
2245  %s = inttoptr i32 %r to <4 x float>*
2246  %v = load <4 x float>, <4 x float>* %s
2247  ret <4 x float> %v
2248}
2249
2250; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2251; NO-SIMD128-NOT: v128
2252; SIMD128-NEXT: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128){{$}}
2253; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
2254; SIMD128-NEXT: return $pop[[R]]{{$}}
2255define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
2256  %q = ptrtoint float* %p to i32
2257  %r = add nuw i32 %q, 16
2258  %s = inttoptr i32 %r to float*
2259  %e = load float, float* %s
2260  %v1 = insertelement <4 x float> undef, float %e, i32 0
2261  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2262  ret <4 x float> %v2
2263}
2264
2265; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2266; NO-SIMD128-NOT: v128
2267; SIMD128-NEXT: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128){{$}}
2268; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2269; SIMD128-NEXT: return $pop[[R]]{{$}}
2270define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
2271  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2272  %v = load <4 x float>, <4 x float>* %s
2273  ret <4 x float> %v
2274}
2275
2276; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2277; NO-SIMD128-NOT: v128
2278; SIMD128-NEXT: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128){{$}}
2279; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 4($0){{$}}
2280; SIMD128-NEXT: return $pop[[R]]{{$}}
2281define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
2282  %s = getelementptr inbounds float, float* %p, i32 1
2283  %e = load float, float* %s
2284  %v1 = insertelement <4 x float> undef, float %e, i32 0
2285  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2286  ret <4 x float> %v2
2287}
2288
2289; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2290; NO-SIMD128-NOT: v128
2291; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2292; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2293; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2294; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2295; SIMD128-NEXT: return $pop[[R]]{{$}}
2296define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
2297  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2298  %v = load <4 x float>, <4 x float>* %s
2299  ret <4 x float> %v
2300}
2301
2302; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2303; NO-SIMD128-NOT: v128
2304; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2305; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -4{{$}}
2306; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2307; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2308; SIMD128-NEXT: return $pop[[R]]{{$}}
2309define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
2310  %s = getelementptr inbounds float, float* %p, i32 -1
2311  %e = load float, float* %s
2312  %v1 = insertelement <4 x float> undef, float %e, i32 0
2313  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2314  ret <4 x float> %v2
2315}
2316
2317; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2318; NO-SIMD128-NOT: v128
2319; SIMD128-NEXT: .functype load_v4f32_with_unfolded_offset (i32) -> (v128){{$}}
2320; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2321; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2322; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2323; SIMD128-NEXT: return $pop[[R]]{{$}}
2324define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
2325  %q = ptrtoint <4 x float>* %p to i32
2326  %r = add nsw i32 %q, 16
2327  %s = inttoptr i32 %r to <4 x float>*
2328  %v = load <4 x float>, <4 x float>* %s
2329  ret <4 x float> %v
2330}
2331
2332; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2333; NO-SIMD128-NOT: v128
2334; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128){{$}}
2335; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2336; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2337; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2338; SIMD128-NEXT: return $pop[[R]]{{$}}
2339define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
2340  %q = ptrtoint float* %p to i32
2341  %r = add nsw i32 %q, 16
2342  %s = inttoptr i32 %r to float*
2343  %e = load float, float* %s
2344  %v1 = insertelement <4 x float> undef, float %e, i32 0
2345  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2346  ret <4 x float> %v2
2347}
2348
2349; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2350; NO-SIMD128-NOT: v128
2351; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}}
2352; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2353; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2354; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2355; SIMD128-NEXT: return $pop[[R]]{{$}}
2356define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
2357  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2358  %v = load <4 x float>, <4 x float>* %s
2359  ret <4 x float> %v
2360}
2361
2362; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
2363; NO-SIMD128-NOT: v128
2364; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}}
2365; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 4{{$}}
2366; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2367; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2368; SIMD128-NEXT: return $pop[[R]]{{$}}
2369define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
2370  %s = getelementptr float, float* %p, i32 1
2371  %e = load float, float* %s
2372  %v1 = insertelement <4 x float> undef, float %e, i32 0
2373  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2374  ret <4 x float> %v2
2375}
2376
2377; CHECK-LABEL: load_v4f32_from_numeric_address:
2378; NO-SIMD128-NOT: v128
2379; SIMD128-NEXT: .functype load_v4f32_from_numeric_address () -> (v128){{$}}
2380; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2381; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2382; SIMD128-NEXT: return $pop[[R]]{{$}}
2383define <4 x float> @load_v4f32_from_numeric_address() {
2384  %s = inttoptr i32 32 to <4 x float>*
2385  %v = load <4 x float>, <4 x float>* %s
2386  ret <4 x float> %v
2387}
2388
2389; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
2390; NO-SIMD128-NOT: v128
2391; SIMD128-NEXT: .functype load_splat_v4f32_from_numeric_address () -> (v128){{$}}
2392; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2393; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2394; SIMD128-NEXT: return $pop[[R]]{{$}}
2395define <4 x float> @load_splat_v4f32_from_numeric_address() {
2396  %s = inttoptr i32 32 to float*
2397  %e = load float, float* %s
2398  %v1 = insertelement <4 x float> undef, float %e, i32 0
2399  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2400  ret <4 x float> %v2
2401}
2402
2403; CHECK-LABEL: load_v4f32_from_global_address:
2404; NO-SIMD128-NOT: v128
2405; SIMD128-NEXT: .functype load_v4f32_from_global_address () -> (v128){{$}}
2406; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2407; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4f32($pop[[L0]]){{$}}
2408; SIMD128-NEXT: return $pop[[R]]{{$}}
2409@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
2410define <4 x float> @load_v4f32_from_global_address() {
2411  %v = load <4 x float>, <4 x float>* @gv_v4f32
2412  ret <4 x float> %v
2413}
2414
2415; CHECK-LABEL: load_splat_v4f32_from_global_address:
2416; NO-SIMD128-NOT: v128
2417; SIMD128-NEXT: .functype load_splat_v4f32_from_global_address () -> (v128){{$}}
2418; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2419; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, gv_f32($pop[[L0]]){{$}}
2420; SIMD128-NEXT: return $pop[[R]]{{$}}
2421@gv_f32 = global float 42.
2422define <4 x float> @load_splat_v4f32_from_global_address() {
2423  %e = load float, float* @gv_f32
2424  %v1 = insertelement <4 x float> undef, float %e, i32 0
2425  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2426  ret <4 x float> %v2
2427}
2428
2429; CHECK-LABEL: store_v4f32:
2430; NO-SIMD128-NOT: v128
2431; SIMD128-NEXT: .functype store_v4f32 (v128, i32) -> (){{$}}
2432; SIMD128-NEXT: v128.store 0($1), $0{{$}}
2433define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
2434  store <4 x float> %v , <4 x float>* %p
2435  ret void
2436}
2437
2438; CHECK-LABEL: store_v4f32_with_folded_offset:
2439; NO-SIMD128-NOT: v128
2440; SIMD128-NEXT: .functype store_v4f32_with_folded_offset (v128, i32) -> (){{$}}
2441; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2442define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
2443  %q = ptrtoint <4 x float>* %p to i32
2444  %r = add nuw i32 %q, 16
2445  %s = inttoptr i32 %r to <4 x float>*
2446  store <4 x float> %v , <4 x float>* %s
2447  ret void
2448}
2449
2450; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
2451; NO-SIMD128-NOT: v128
2452; SIMD128-NEXT: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> (){{$}}
2453; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2454define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2455  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
2456  store <4 x float> %v , <4 x float>* %s
2457  ret void
2458}
2459
2460; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
2461; NO-SIMD128-NOT: v128
2462; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
2463; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2464; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2465; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2466define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
2467  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2468  store <4 x float> %v , <4 x float>* %s
2469  ret void
2470}
2471
2472; CHECK-LABEL: store_v4f32_with_unfolded_offset:
2473; NO-SIMD128-NOT: v128
2474; SIMD128-NEXT: .functype store_v4f32_with_unfolded_offset (v128, i32) -> (){{$}}
2475; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2476; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2477; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2478define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
2479  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
2480  store <4 x float> %v , <4 x float>* %s
2481  ret void
2482}
2483
2484; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
2485; NO-SIMD128-NOT: v128
2486; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> (){{$}}
2487; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2488; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2489; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2490define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
2491  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
2492  store <4 x float> %v , <4 x float>* %s
2493  ret void
2494}
2495
2496; CHECK-LABEL: store_v4f32_to_numeric_address:
2497; NO-SIMD128-NOT: v128
2498; SIMD128-NEXT: .functype store_v4f32_to_numeric_address (v128) -> (){{$}}
2499; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2500; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
2501define void @store_v4f32_to_numeric_address(<4 x float> %v) {
2502  %s = inttoptr i32 32 to <4 x float>*
2503  store <4 x float> %v , <4 x float>* %s
2504  ret void
2505}
2506
2507; CHECK-LABEL: store_v4f32_to_global_address:
2508; NO-SIMD128-NOT: v128
2509; SIMD128-NEXT: .functype store_v4f32_to_global_address (v128) -> (){{$}}
2510; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
2511; SIMD128-NEXT: v128.store gv_v4f32($pop[[R]]), $0{{$}}
2512define void @store_v4f32_to_global_address(<4 x float> %v) {
2513  store <4 x float> %v , <4 x float>* @gv_v4f32
2514  ret void
2515}
2516
2517; ==============================================================================
2518; 2 x double
2519; ==============================================================================
2520; CHECK-LABEL: load_v2f64:
2521; NO-SIMD128-NOT: v128
2522; SIMD128-VM-NOT: v128
2523; SIMD128-NEXT: .functype load_v2f64 (i32) -> (v128){{$}}
2524; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
2525; SIMD128-NEXT: return $pop[[R]]{{$}}
2526define <2 x double> @load_v2f64(<2 x double>* %p) {
2527  %v = load <2 x double>, <2 x double>* %p
2528  ret <2 x double> %v
2529}
2530
2531; CHECK-LABEL: load_splat_v2f64:
2532; NO-SIMD128-NOT: v128
2533; SIMD128-VM-NOT: v128
2534; SIMD128-NEXT: .functype load_splat_v2f64 (i32) -> (v128){{$}}
2535; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
2536; SIMD128-NEXT: return $pop[[R]]{{$}}
2537define <2 x double> @load_splat_v2f64(double* %p) {
2538  %e = load double, double* %p
2539  %v1 = insertelement <2 x double> undef, double %e, i32 0
2540  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2541  ret <2 x double> %v2
2542}
2543
2544; CHECK-LABEL: load_v2f64_with_folded_offset:
2545; NO-SIMD128-NOT: v128
2546; SIMD128-VM-NOT: v128
2547; SIMD128-NEXT: .functype load_v2f64_with_folded_offset (i32) -> (v128){{$}}
2548; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2549; SIMD128-NEXT: return $pop[[R]]{{$}}
2550define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
2551  %q = ptrtoint <2 x double>* %p to i32
2552  %r = add nuw i32 %q, 16
2553  %s = inttoptr i32 %r to <2 x double>*
2554  %v = load <2 x double>, <2 x double>* %s
2555  ret <2 x double> %v
2556}
2557
2558; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
2559; NO-SIMD128-NOT: v128
2560; SIMD128-VM-NOT: v128
2561; SIMD128-NEXT: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128){{$}}
2562; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
2563; SIMD128-NEXT: return $pop[[R]]{{$}}
2564define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
2565  %q = ptrtoint double* %p to i32
2566  %r = add nuw i32 %q, 16
2567  %s = inttoptr i32 %r to double*
2568  %e = load double, double* %s
2569  %v1 = insertelement <2 x double> undef, double %e, i32 0
2570  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2571  ret <2 x double> %v2
2572}
2573
2574; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
2575; NO-SIMD128-NOT: v128
2576; SIMD128-VM-NOT: v128
2577; SIMD128-NEXT: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128){{$}}
2578; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
2579; SIMD128-NEXT: return $pop[[R]]{{$}}
2580define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
2581  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
2582  %v = load <2 x double>, <2 x double>* %s
2583  ret <2 x double> %v
2584}
2585
2586; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
2587; NO-SIMD128-NOT: v128
2588; SIMD128-VM-NOT: v128
2589; SIMD128-NEXT: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128){{$}}
2590; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 8($0){{$}}
2591; SIMD128-NEXT: return $pop[[R]]{{$}}
2592define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
2593  %s = getelementptr inbounds double, double* %p, i32 1
2594  %e = load double, double* %s
2595  %v1 = insertelement <2 x double> undef, double %e, i32 0
2596  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2597  ret <2 x double> %v2
2598}
2599
2600; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
2601; NO-SIMD128-NOT: v128
2602; SIMD128-VM-NOT: v128
2603; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2604; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2605; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2606; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2607; SIMD128-NEXT: return $pop[[R]]{{$}}
2608define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
2609  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2610  %v = load <2 x double>, <2 x double>* %s
2611  ret <2 x double> %v
2612}
2613
2614; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
2615; NO-SIMD128-NOT: v128
2616; SIMD128-VM-NOT: v128
2617; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
2618; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
2619; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2620; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2621; SIMD128-NEXT: return $pop[[R]]{{$}}
2622define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
2623  %s = getelementptr inbounds double, double* %p, i32 -1
2624  %e = load double, double* %s
2625  %v1 = insertelement <2 x double> undef, double %e, i32 0
2626  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2627  ret <2 x double> %v2
2628}
2629
2630; CHECK-LABEL: load_v2f64_with_unfolded_offset:
2631; NO-SIMD128-NOT: v128
2632; SIMD128-VM-NOT: v128
2633; SIMD128-NEXT: .functype load_v2f64_with_unfolded_offset (i32) -> (v128){{$}}
2634; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2635; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2636; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2637; SIMD128-NEXT: return $pop[[R]]{{$}}
2638define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
2639  %q = ptrtoint <2 x double>* %p to i32
2640  %r = add nsw i32 %q, 16
2641  %s = inttoptr i32 %r to <2 x double>*
2642  %v = load <2 x double>, <2 x double>* %s
2643  ret <2 x double> %v
2644}
2645
2646; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
2647; NO-SIMD128-NOT: v128
2648; SIMD128-VM-NOT: v128
2649; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128){{$}}
2650; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2651; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2652; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2653; SIMD128-NEXT: return $pop[[R]]{{$}}
2654define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
2655  %q = ptrtoint double* %p to i32
2656  %r = add nsw i32 %q, 16
2657  %s = inttoptr i32 %r to double*
2658  %e = load double, double* %s
2659  %v1 = insertelement <2 x double> undef, double %e, i32 0
2660  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2661  ret <2 x double> %v2
2662}
2663
2664; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
2665; NO-SIMD128-NOT: v128
2666; SIMD128-VM-NOT: v128
2667; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}}
2668; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2669; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2670; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2671; SIMD128-NEXT: return $pop[[R]]{{$}}
2672define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
2673  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
2674  %v = load <2 x double>, <2 x double>* %s
2675  ret <2 x double> %v
2676}
2677
2678; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
2679; NO-SIMD128-NOT: v128
2680; SIMD128-VM-NOT: v128
2681; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}}
2682; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
2683; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
2684; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
2685; SIMD128-NEXT: return $pop[[R]]{{$}}
2686define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
2687  %s = getelementptr double, double* %p, i32 1
2688  %e = load double, double* %s
2689  %v1 = insertelement <2 x double> undef, double %e, i32 0
2690  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2691  ret <2 x double> %v2
2692}
2693
2694; CHECK-LABEL: load_v2f64_from_numeric_address:
2695; NO-SIMD128-NOT: v128
2696; SIMD128-VM-NOT: v128
2697; SIMD128-NEXT: .functype load_v2f64_from_numeric_address () -> (v128){{$}}
2698; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2699; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2700; SIMD128-NEXT: return $pop[[R]]{{$}}
2701define <2 x double> @load_v2f64_from_numeric_address() {
2702  %s = inttoptr i32 32 to <2 x double>*
2703  %v = load <2 x double>, <2 x double>* %s
2704  ret <2 x double> %v
2705}
2706
2707; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
2708; NO-SIMD128-NOT: v128
2709; SIMD128-VM-NOT: v128
2710; SIMD128-NEXT: .functype load_splat_v2f64_from_numeric_address () -> (v128){{$}}
2711; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2712; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
2713; SIMD128-NEXT: return $pop[[R]]{{$}}
2714define <2 x double> @load_splat_v2f64_from_numeric_address() {
2715  %s = inttoptr i32 32 to double*
2716  %e = load double, double* %s
2717  %v1 = insertelement <2 x double> undef, double %e, i32 0
2718  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2719  ret <2 x double> %v2
2720}
2721
2722; CHECK-LABEL: load_v2f64_from_global_address:
2723; NO-SIMD128-NOT: v128
2724; SIMD128-VM-NOT: v128
2725; SIMD128-NEXT: .functype load_v2f64_from_global_address () -> (v128){{$}}
2726; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2727; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2f64($pop[[L0]]){{$}}
2728; SIMD128-NEXT: return $pop[[R]]{{$}}
2729@gv_v2f64 = global <2 x double> <double 42., double 42.>
2730define <2 x double> @load_v2f64_from_global_address() {
2731  %v = load <2 x double>, <2 x double>* @gv_v2f64
2732  ret <2 x double> %v
2733}
2734
2735; CHECK-LABEL: load_splat_v2f64_from_global_address:
2736; NO-SIMD128-NOT: v128
2737; SIMD128-VM-NOT: v128
2738; SIMD128-NEXT: .functype load_splat_v2f64_from_global_address () -> (v128){{$}}
2739; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2740; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, gv_f64($pop[[L0]]){{$}}
2741; SIMD128-NEXT: return $pop[[R]]{{$}}
2742@gv_f64 = global double 42.
2743define <2 x double> @load_splat_v2f64_from_global_address() {
2744  %e = load double, double* @gv_f64
2745  %v1 = insertelement <2 x double> undef, double %e, i32 0
2746  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
2747  ret <2 x double> %v2
2748}
2749
2750; CHECK-LABEL: store_v2f64:
2751; NO-SIMD128-NOT: v128
2752; SIMD128-VM-NOT: v128
2753; SIMD128-NEXT: .functype store_v2f64 (v128, i32) -> (){{$}}
2754; SIMD128-NEXT: v128.store 0($1), $0{{$}}
2755define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
2756  store <2 x double> %v , <2 x double>* %p
2757  ret void
2758}
2759
2760; CHECK-LABEL: store_v2f64_with_folded_offset:
2761; NO-SIMD128-NOT: v128
2762; SIMD128-VM-NOT: v128
2763; SIMD128-NEXT: .functype store_v2f64_with_folded_offset (v128, i32) -> (){{$}}
2764; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2765define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
2766  %q = ptrtoint <2 x double>* %p to i32
2767  %r = add nuw i32 %q, 16
2768  %s = inttoptr i32 %r to <2 x double>*
2769  store <2 x double> %v , <2 x double>* %s
2770  ret void
2771}
2772
2773; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
2774; NO-SIMD128-NOT: v128
2775; SIMD128-VM-NOT: v128
2776; SIMD128-NEXT: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> (){{$}}
2777; SIMD128-NEXT: v128.store 16($1), $0{{$}}
2778define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
2779  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
2780  store <2 x double> %v , <2 x double>* %s
2781  ret void
2782}
2783
2784; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
2785; NO-SIMD128-NOT: v128
2786; SIMD128-VM-NOT: v128
2787; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
2788; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2789; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2790; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2791define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
2792  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2793  store <2 x double> %v , <2 x double>* %s
2794  ret void
2795}
2796
2797; CHECK-LABEL: store_v2f64_with_unfolded_offset:
2798; NO-SIMD128-NOT: v128
2799; SIMD128-VM-NOT: v128
2800; SIMD128-NEXT: .functype store_v2f64_with_unfolded_offset (v128, i32) -> (){{$}}
2801; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
2802; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2803; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2804define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
2805  %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
2806  store <2 x double> %v , <2 x double>* %s
2807  ret void
2808}
2809
2810; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
2811; NO-SIMD128-NOT: v128
2812; SIMD128-VM-NOT: v128
2813; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> (){{$}}
2814; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
2815; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
2816; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
2817define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
2818  %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
2819  store <2 x double> %v , <2 x double>* %s
2820  ret void
2821}
2822
2823; CHECK-LABEL: store_v2f64_to_numeric_address:
2824; NO-SIMD128-NOT: v128
2825; SIMD128-VM-NOT: v128
2826; SIMD128-NEXT: .functype store_v2f64_to_numeric_address (v128) -> (){{$}}
2827; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
2828; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
2829define void @store_v2f64_to_numeric_address(<2 x double> %v) {
2830  %s = inttoptr i32 32 to <2 x double>*
2831  store <2 x double> %v , <2 x double>* %s
2832  ret void
2833}
2834
2835; CHECK-LABEL: store_v2f64_to_global_address:
2836; NO-SIMD128-NOT: v128
2837; SIMD128-VM-NOT: v128
2838; SIMD128-NEXT: .functype store_v2f64_to_global_address (v128) -> (){{$}}
2839; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
2840; SIMD128-NEXT: v128.store gv_v2f64($pop[[R]]), $0{{$}}
2841define void @store_v2f64_to_global_address(<2 x double> %v) {
2842  store <2 x double> %v , <2 x double>* @gv_v2f64
2843  ret void
2844}
2845