1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test SIMD v128.load{8,16,32,64}_lane instructions.
5
6; TODO: Use the offset field by supporting more patterns. Right now only the
7; equivalents of LoadPatNoOffset/StorePatNoOffset are supported.
8
9target triple = "wasm32-unknown-unknown"
10
11;===----------------------------------------------------------------------------
12; v128.load8_lane / v128.store8_lane
13;===----------------------------------------------------------------------------
14
15define <16 x i8> @load_lane_i8_no_offset(i8* %p, <16 x i8> %v) {
16; CHECK-LABEL: load_lane_i8_no_offset:
17; CHECK:         .functype load_lane_i8_no_offset (i32, v128) -> (v128)
18; CHECK-NEXT:  # %bb.0:
19; CHECK-NEXT:    local.get 0
20; CHECK-NEXT:    local.get 1
21; CHECK-NEXT:    v128.load8_lane 0, 0
22; CHECK-NEXT:    # fallthrough-return
23  %x = load i8, i8* %p
24  %t = insertelement <16 x i8> %v, i8 %x, i32 0
25  ret <16 x i8> %t
26}
27
28define <16 x i8> @load_lane_i8_with_folded_offset(i8* %p, <16 x i8> %v) {
29; CHECK-LABEL: load_lane_i8_with_folded_offset:
30; CHECK:         .functype load_lane_i8_with_folded_offset (i32, v128) -> (v128)
31; CHECK-NEXT:  # %bb.0:
32; CHECK-NEXT:    local.get 0
33; CHECK-NEXT:    i32.const 24
34; CHECK-NEXT:    i32.add
35; CHECK-NEXT:    local.get 1
36; CHECK-NEXT:    v128.load8_lane 0, 0
37; CHECK-NEXT:    # fallthrough-return
38  %q = ptrtoint i8* %p to i32
39  %r = add nuw i32 %q, 24
40  %s = inttoptr i32 %r to i8*
41  %x = load i8, i8* %s
42  %t = insertelement <16 x i8> %v, i8 %x, i32 0
43  ret <16 x i8> %t
44}
45
46define <16 x i8> @load_lane_i8_with_folded_gep_offset(i8* %p, <16 x i8> %v) {
47; CHECK-LABEL: load_lane_i8_with_folded_gep_offset:
48; CHECK:         .functype load_lane_i8_with_folded_gep_offset (i32, v128) -> (v128)
49; CHECK-NEXT:  # %bb.0:
50; CHECK-NEXT:    local.get 0
51; CHECK-NEXT:    i32.const 6
52; CHECK-NEXT:    i32.add
53; CHECK-NEXT:    local.get 1
54; CHECK-NEXT:    v128.load8_lane 0, 0
55; CHECK-NEXT:    # fallthrough-return
56  %s = getelementptr inbounds i8, i8* %p, i32 6
57  %x = load i8, i8* %s
58  %t = insertelement <16 x i8> %v, i8 %x, i32 0
59  ret <16 x i8> %t
60}
61
62define <16 x i8> @load_lane_i8_with_unfolded_gep_negative_offset(i8* %p, <16 x i8> %v) {
63; CHECK-LABEL: load_lane_i8_with_unfolded_gep_negative_offset:
64; CHECK:         .functype load_lane_i8_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
65; CHECK-NEXT:  # %bb.0:
66; CHECK-NEXT:    local.get 0
67; CHECK-NEXT:    i32.const -6
68; CHECK-NEXT:    i32.add
69; CHECK-NEXT:    local.get 1
70; CHECK-NEXT:    v128.load8_lane 0, 0
71; CHECK-NEXT:    # fallthrough-return
72  %s = getelementptr inbounds i8, i8* %p, i32 -6
73  %x = load i8, i8* %s
74  %t = insertelement <16 x i8> %v, i8 %x, i32 0
75  ret <16 x i8> %t
76}
77
78define <16 x i8> @load_lane_i8_with_unfolded_offset(i8* %p, <16 x i8> %v) {
79; CHECK-LABEL: load_lane_i8_with_unfolded_offset:
80; CHECK:         .functype load_lane_i8_with_unfolded_offset (i32, v128) -> (v128)
81; CHECK-NEXT:  # %bb.0:
82; CHECK-NEXT:    local.get 0
83; CHECK-NEXT:    i32.const 24
84; CHECK-NEXT:    i32.add
85; CHECK-NEXT:    local.get 1
86; CHECK-NEXT:    v128.load8_lane 0, 0
87; CHECK-NEXT:    # fallthrough-return
88  %q = ptrtoint i8* %p to i32
89  %r = add nsw i32 %q, 24
90  %s = inttoptr i32 %r to i8*
91  %x = load i8, i8* %s
92  %t = insertelement <16 x i8> %v, i8 %x, i32 0
93  ret <16 x i8> %t
94}
95
96define <16 x i8> @load_lane_i8_with_unfolded_gep_offset(i8* %p, <16 x i8> %v) {
97; CHECK-LABEL: load_lane_i8_with_unfolded_gep_offset:
98; CHECK:         .functype load_lane_i8_with_unfolded_gep_offset (i32, v128) -> (v128)
99; CHECK-NEXT:  # %bb.0:
100; CHECK-NEXT:    local.get 0
101; CHECK-NEXT:    i32.const 6
102; CHECK-NEXT:    i32.add
103; CHECK-NEXT:    local.get 1
104; CHECK-NEXT:    v128.load8_lane 0, 0
105; CHECK-NEXT:    # fallthrough-return
106  %s = getelementptr i8, i8* %p, i32 6
107  %x = load i8, i8* %s
108  %t = insertelement <16 x i8> %v, i8 %x, i32 0
109  ret <16 x i8> %t
110}
111
112define <16 x i8> @load_lane_i8_from_numeric_address(<16 x i8> %v) {
113; CHECK-LABEL: load_lane_i8_from_numeric_address:
114; CHECK:         .functype load_lane_i8_from_numeric_address (v128) -> (v128)
115; CHECK-NEXT:  # %bb.0:
116; CHECK-NEXT:    i32.const 42
117; CHECK-NEXT:    local.get 0
118; CHECK-NEXT:    v128.load8_lane 0, 0
119; CHECK-NEXT:    # fallthrough-return
120  %s = inttoptr i32 42 to i8*
121  %x = load i8, i8* %s
122  %t = insertelement <16 x i8> %v, i8 %x, i32 0
123  ret <16 x i8> %t
124}
125
126@gv_i8 = global i8 0
127define <16 x i8> @load_lane_i8_from_global_address(<16 x i8> %v) {
128; CHECK-LABEL: load_lane_i8_from_global_address:
129; CHECK:         .functype load_lane_i8_from_global_address (v128) -> (v128)
130; CHECK-NEXT:  # %bb.0:
131; CHECK-NEXT:    i32.const gv_i8
132; CHECK-NEXT:    local.get 0
133; CHECK-NEXT:    v128.load8_lane 0, 0
134; CHECK-NEXT:    # fallthrough-return
135  %x = load i8, i8* @gv_i8
136  %t = insertelement <16 x i8> %v, i8 %x, i32 0
137  ret <16 x i8> %t
138}
139
140define void @store_lane_i8_no_offset(<16 x i8> %v, i8* %p) {
141; CHECK-LABEL: store_lane_i8_no_offset:
142; CHECK:         .functype store_lane_i8_no_offset (v128, i32) -> ()
143; CHECK-NEXT:  # %bb.0:
144; CHECK-NEXT:    local.get 1
145; CHECK-NEXT:    local.get 0
146; CHECK-NEXT:    v128.store8_lane 0, 0
147; CHECK-NEXT:    # fallthrough-return
148  %x = extractelement <16 x i8> %v, i32 0
149  store i8 %x, i8* %p
150  ret void
151}
152
153define void @store_lane_i8_with_folded_offset(<16 x i8> %v, i8* %p) {
154; CHECK-LABEL: store_lane_i8_with_folded_offset:
155; CHECK:         .functype store_lane_i8_with_folded_offset (v128, i32) -> ()
156; CHECK-NEXT:  # %bb.0:
157; CHECK-NEXT:    local.get 1
158; CHECK-NEXT:    i32.const 24
159; CHECK-NEXT:    i32.add
160; CHECK-NEXT:    local.get 0
161; CHECK-NEXT:    v128.store8_lane 0, 0
162; CHECK-NEXT:    # fallthrough-return
163  %q = ptrtoint i8* %p to i32
164  %r = add nuw i32 %q, 24
165  %s = inttoptr i32 %r to i8*
166  %x = extractelement <16 x i8> %v, i32 0
167  store i8 %x, i8* %s
168  ret void
169}
170
171define void @store_lane_i8_with_folded_gep_offset(<16 x i8> %v, i8* %p) {
172; CHECK-LABEL: store_lane_i8_with_folded_gep_offset:
173; CHECK:         .functype store_lane_i8_with_folded_gep_offset (v128, i32) -> ()
174; CHECK-NEXT:  # %bb.0:
175; CHECK-NEXT:    local.get 1
176; CHECK-NEXT:    i32.const 6
177; CHECK-NEXT:    i32.add
178; CHECK-NEXT:    local.get 0
179; CHECK-NEXT:    v128.store8_lane 0, 0
180; CHECK-NEXT:    # fallthrough-return
181  %s = getelementptr inbounds i8, i8* %p, i32 6
182  %x = extractelement <16 x i8> %v, i32 0
183  store i8 %x, i8* %s
184  ret void
185}
186
187define void @store_lane_i8_with_unfolded_gep_negative_offset(<16 x i8> %v, i8* %p) {
188; CHECK-LABEL: store_lane_i8_with_unfolded_gep_negative_offset:
189; CHECK:         .functype store_lane_i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
190; CHECK-NEXT:  # %bb.0:
191; CHECK-NEXT:    local.get 1
192; CHECK-NEXT:    i32.const -6
193; CHECK-NEXT:    i32.add
194; CHECK-NEXT:    local.get 0
195; CHECK-NEXT:    v128.store8_lane 0, 0
196; CHECK-NEXT:    # fallthrough-return
197  %s = getelementptr inbounds i8, i8* %p, i32 -6
198  %x = extractelement <16 x i8> %v, i32 0
199  store i8 %x, i8* %s
200  ret void
201}
202
203define void @store_lane_i8_with_unfolded_offset(<16 x i8> %v, i8* %p) {
204; CHECK-LABEL: store_lane_i8_with_unfolded_offset:
205; CHECK:         .functype store_lane_i8_with_unfolded_offset (v128, i32) -> ()
206; CHECK-NEXT:  # %bb.0:
207; CHECK-NEXT:    local.get 1
208; CHECK-NEXT:    i32.const 24
209; CHECK-NEXT:    i32.add
210; CHECK-NEXT:    local.get 0
211; CHECK-NEXT:    v128.store8_lane 0, 0
212; CHECK-NEXT:    # fallthrough-return
213  %q = ptrtoint i8* %p to i32
214  %r = add nsw i32 %q, 24
215  %s = inttoptr i32 %r to i8*
216  %x = extractelement <16 x i8> %v, i32 0
217  store i8 %x, i8* %s
218  ret void
219}
220
221define void @store_lane_i8_with_unfolded_gep_offset(<16 x i8> %v, i8* %p) {
222; CHECK-LABEL: store_lane_i8_with_unfolded_gep_offset:
223; CHECK:         .functype store_lane_i8_with_unfolded_gep_offset (v128, i32) -> ()
224; CHECK-NEXT:  # %bb.0:
225; CHECK-NEXT:    local.get 1
226; CHECK-NEXT:    i32.const 6
227; CHECK-NEXT:    i32.add
228; CHECK-NEXT:    local.get 0
229; CHECK-NEXT:    v128.store8_lane 0, 0
230; CHECK-NEXT:    # fallthrough-return
231  %s = getelementptr i8, i8* %p, i32 6
232  %x = extractelement <16 x i8> %v, i32 0
233  store i8 %x, i8* %s
234  ret void
235}
236
237define void @store_lane_i8_to_numeric_address(<16 x i8> %v) {
238; CHECK-LABEL: store_lane_i8_to_numeric_address:
239; CHECK:         .functype store_lane_i8_to_numeric_address (v128) -> ()
240; CHECK-NEXT:  # %bb.0:
241; CHECK-NEXT:    i32.const 42
242; CHECK-NEXT:    local.get 0
243; CHECK-NEXT:    v128.store8_lane 0, 0
244; CHECK-NEXT:    # fallthrough-return
245  %s = inttoptr i32 42 to i8*
246  %x = extractelement <16 x i8> %v, i32 0
247  store i8 %x, i8* %s
248  ret void
249}
250
251define void @store_lane_i8_from_global_address(<16 x i8> %v) {
252; CHECK-LABEL: store_lane_i8_from_global_address:
253; CHECK:         .functype store_lane_i8_from_global_address (v128) -> ()
254; CHECK-NEXT:  # %bb.0:
255; CHECK-NEXT:    i32.const gv_i8
256; CHECK-NEXT:    local.get 0
257; CHECK-NEXT:    v128.store8_lane 0, 0
258; CHECK-NEXT:    # fallthrough-return
259  %x = extractelement <16 x i8> %v, i32 0
260  store i8 %x, i8* @gv_i8
261  ret void
262}
263
264;===----------------------------------------------------------------------------
265; v128.load16_lane / v128.store16_lane
266;===----------------------------------------------------------------------------
267
268define <8 x i16> @load_lane_i16_no_offset(i16* %p, <8 x i16> %v) {
269; CHECK-LABEL: load_lane_i16_no_offset:
270; CHECK:         .functype load_lane_i16_no_offset (i32, v128) -> (v128)
271; CHECK-NEXT:  # %bb.0:
272; CHECK-NEXT:    local.get 0
273; CHECK-NEXT:    local.get 1
274; CHECK-NEXT:    v128.load16_lane 0, 0
275; CHECK-NEXT:    # fallthrough-return
276  %x = load i16, i16* %p
277  %t = insertelement <8 x i16> %v, i16 %x, i32 0
278  ret <8 x i16> %t
279}
280
281define <8 x i16> @load_lane_i16_with_folded_offset(i16* %p, <8 x i16> %v) {
282; CHECK-LABEL: load_lane_i16_with_folded_offset:
283; CHECK:         .functype load_lane_i16_with_folded_offset (i32, v128) -> (v128)
284; CHECK-NEXT:  # %bb.0:
285; CHECK-NEXT:    local.get 0
286; CHECK-NEXT:    i32.const 24
287; CHECK-NEXT:    i32.add
288; CHECK-NEXT:    local.get 1
289; CHECK-NEXT:    v128.load16_lane 0, 0
290; CHECK-NEXT:    # fallthrough-return
291  %q = ptrtoint i16* %p to i32
292  %r = add nuw i32 %q, 24
293  %s = inttoptr i32 %r to i16*
294  %x = load i16, i16* %s
295  %t = insertelement <8 x i16> %v, i16 %x, i32 0
296  ret <8 x i16> %t
297}
298
299define <8 x i16> @load_lane_i16_with_folded_gep_offset(i16* %p, <8 x i16> %v) {
300; CHECK-LABEL: load_lane_i16_with_folded_gep_offset:
301; CHECK:         .functype load_lane_i16_with_folded_gep_offset (i32, v128) -> (v128)
302; CHECK-NEXT:  # %bb.0:
303; CHECK-NEXT:    local.get 0
304; CHECK-NEXT:    i32.const 12
305; CHECK-NEXT:    i32.add
306; CHECK-NEXT:    local.get 1
307; CHECK-NEXT:    v128.load16_lane 0, 0
308; CHECK-NEXT:    # fallthrough-return
309  %s = getelementptr inbounds i16, i16* %p, i32 6
310  %x = load i16, i16* %s
311  %t = insertelement <8 x i16> %v, i16 %x, i32 0
312  ret <8 x i16> %t
313}
314
315define <8 x i16> @load_lane_i16_with_unfolded_gep_negative_offset(i16* %p, <8 x i16> %v) {
316; CHECK-LABEL: load_lane_i16_with_unfolded_gep_negative_offset:
317; CHECK:         .functype load_lane_i16_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
318; CHECK-NEXT:  # %bb.0:
319; CHECK-NEXT:    local.get 0
320; CHECK-NEXT:    i32.const -12
321; CHECK-NEXT:    i32.add
322; CHECK-NEXT:    local.get 1
323; CHECK-NEXT:    v128.load16_lane 0, 0
324; CHECK-NEXT:    # fallthrough-return
325  %s = getelementptr inbounds i16, i16* %p, i32 -6
326  %x = load i16, i16* %s
327  %t = insertelement <8 x i16> %v, i16 %x, i32 0
328  ret <8 x i16> %t
329}
330
331define <8 x i16> @load_lane_i16_with_unfolded_offset(i16* %p, <8 x i16> %v) {
332; CHECK-LABEL: load_lane_i16_with_unfolded_offset:
333; CHECK:         .functype load_lane_i16_with_unfolded_offset (i32, v128) -> (v128)
334; CHECK-NEXT:  # %bb.0:
335; CHECK-NEXT:    local.get 0
336; CHECK-NEXT:    i32.const 24
337; CHECK-NEXT:    i32.add
338; CHECK-NEXT:    local.get 1
339; CHECK-NEXT:    v128.load16_lane 0, 0
340; CHECK-NEXT:    # fallthrough-return
341  %q = ptrtoint i16* %p to i32
342  %r = add nsw i32 %q, 24
343  %s = inttoptr i32 %r to i16*
344  %x = load i16, i16* %s
345  %t = insertelement <8 x i16> %v, i16 %x, i32 0
346  ret <8 x i16> %t
347}
348
349define <8 x i16> @load_lane_i16_with_unfolded_gep_offset(i16* %p, <8 x i16> %v) {
350; CHECK-LABEL: load_lane_i16_with_unfolded_gep_offset:
351; CHECK:         .functype load_lane_i16_with_unfolded_gep_offset (i32, v128) -> (v128)
352; CHECK-NEXT:  # %bb.0:
353; CHECK-NEXT:    local.get 0
354; CHECK-NEXT:    i32.const 12
355; CHECK-NEXT:    i32.add
356; CHECK-NEXT:    local.get 1
357; CHECK-NEXT:    v128.load16_lane 0, 0
358; CHECK-NEXT:    # fallthrough-return
359  %s = getelementptr i16, i16* %p, i32 6
360  %x = load i16, i16* %s
361  %t = insertelement <8 x i16> %v, i16 %x, i32 0
362  ret <8 x i16> %t
363}
364
365define <8 x i16> @load_lane_i16_from_numeric_address(<8 x i16> %v) {
366; CHECK-LABEL: load_lane_i16_from_numeric_address:
367; CHECK:         .functype load_lane_i16_from_numeric_address (v128) -> (v128)
368; CHECK-NEXT:  # %bb.0:
369; CHECK-NEXT:    i32.const 42
370; CHECK-NEXT:    local.get 0
371; CHECK-NEXT:    v128.load16_lane 0, 0
372; CHECK-NEXT:    # fallthrough-return
373  %s = inttoptr i32 42 to i16*
374  %x = load i16, i16* %s
375  %t = insertelement <8 x i16> %v, i16 %x, i32 0
376  ret <8 x i16> %t
377}
378
379@gv_i16 = global i16 0
380define <8 x i16> @load_lane_i16_from_global_address(<8 x i16> %v) {
381; CHECK-LABEL: load_lane_i16_from_global_address:
382; CHECK:         .functype load_lane_i16_from_global_address (v128) -> (v128)
383; CHECK-NEXT:  # %bb.0:
384; CHECK-NEXT:    i32.const gv_i16
385; CHECK-NEXT:    local.get 0
386; CHECK-NEXT:    v128.load16_lane 0, 0
387; CHECK-NEXT:    # fallthrough-return
388  %x = load i16, i16* @gv_i16
389  %t = insertelement <8 x i16> %v, i16 %x, i32 0
390  ret <8 x i16> %t
391}
392
393define void @store_lane_i16_no_offset(<8 x i16> %v, i16* %p) {
394; CHECK-LABEL: store_lane_i16_no_offset:
395; CHECK:         .functype store_lane_i16_no_offset (v128, i32) -> ()
396; CHECK-NEXT:  # %bb.0:
397; CHECK-NEXT:    local.get 1
398; CHECK-NEXT:    local.get 0
399; CHECK-NEXT:    v128.store16_lane 0, 0
400; CHECK-NEXT:    # fallthrough-return
401  %x = extractelement <8 x i16> %v, i32 0
402  store i16 %x, i16* %p
403  ret void
404}
405
406define void @store_lane_i16_with_folded_offset(<8 x i16> %v, i16* %p) {
407; CHECK-LABEL: store_lane_i16_with_folded_offset:
408; CHECK:         .functype store_lane_i16_with_folded_offset (v128, i32) -> ()
409; CHECK-NEXT:  # %bb.0:
410; CHECK-NEXT:    local.get 1
411; CHECK-NEXT:    i32.const 24
412; CHECK-NEXT:    i32.add
413; CHECK-NEXT:    local.get 0
414; CHECK-NEXT:    v128.store16_lane 0, 0
415; CHECK-NEXT:    # fallthrough-return
416  %q = ptrtoint i16* %p to i32
417  %r = add nuw i32 %q, 24
418  %s = inttoptr i32 %r to i16*
419  %x = extractelement <8 x i16> %v, i32 0
420  store i16 %x, i16* %s
421  ret void
422}
423
424define void @store_lane_i16_with_folded_gep_offset(<8 x i16> %v, i16* %p) {
425; CHECK-LABEL: store_lane_i16_with_folded_gep_offset:
426; CHECK:         .functype store_lane_i16_with_folded_gep_offset (v128, i32) -> ()
427; CHECK-NEXT:  # %bb.0:
428; CHECK-NEXT:    local.get 1
429; CHECK-NEXT:    i32.const 12
430; CHECK-NEXT:    i32.add
431; CHECK-NEXT:    local.get 0
432; CHECK-NEXT:    v128.store16_lane 0, 0
433; CHECK-NEXT:    # fallthrough-return
434  %s = getelementptr inbounds i16, i16* %p, i32 6
435  %x = extractelement <8 x i16> %v, i32 0
436  store i16 %x, i16* %s
437  ret void
438}
439
440define void @store_lane_i16_with_unfolded_gep_negative_offset(<8 x i16> %v, i16* %p) {
441; CHECK-LABEL: store_lane_i16_with_unfolded_gep_negative_offset:
442; CHECK:         .functype store_lane_i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
443; CHECK-NEXT:  # %bb.0:
444; CHECK-NEXT:    local.get 1
445; CHECK-NEXT:    i32.const -12
446; CHECK-NEXT:    i32.add
447; CHECK-NEXT:    local.get 0
448; CHECK-NEXT:    v128.store16_lane 0, 0
449; CHECK-NEXT:    # fallthrough-return
450  %s = getelementptr inbounds i16, i16* %p, i32 -6
451  %x = extractelement <8 x i16> %v, i32 0
452  store i16 %x, i16* %s
453  ret void
454}
455
456define void @store_lane_i16_with_unfolded_offset(<8 x i16> %v, i16* %p) {
457; CHECK-LABEL: store_lane_i16_with_unfolded_offset:
458; CHECK:         .functype store_lane_i16_with_unfolded_offset (v128, i32) -> ()
459; CHECK-NEXT:  # %bb.0:
460; CHECK-NEXT:    local.get 1
461; CHECK-NEXT:    i32.const 24
462; CHECK-NEXT:    i32.add
463; CHECK-NEXT:    local.get 0
464; CHECK-NEXT:    v128.store16_lane 0, 0
465; CHECK-NEXT:    # fallthrough-return
466  %q = ptrtoint i16* %p to i32
467  %r = add nsw i32 %q, 24
468  %s = inttoptr i32 %r to i16*
469  %x = extractelement <8 x i16> %v, i32 0
470  store i16 %x, i16* %s
471  ret void
472}
473
474define void @store_lane_i16_with_unfolded_gep_offset(<8 x i16> %v, i16* %p) {
475; CHECK-LABEL: store_lane_i16_with_unfolded_gep_offset:
476; CHECK:         .functype store_lane_i16_with_unfolded_gep_offset (v128, i32) -> ()
477; CHECK-NEXT:  # %bb.0:
478; CHECK-NEXT:    local.get 1
479; CHECK-NEXT:    i32.const 12
480; CHECK-NEXT:    i32.add
481; CHECK-NEXT:    local.get 0
482; CHECK-NEXT:    v128.store16_lane 0, 0
483; CHECK-NEXT:    # fallthrough-return
484  %s = getelementptr i16, i16* %p, i32 6
485  %x = extractelement <8 x i16> %v, i32 0
486  store i16 %x, i16* %s
487  ret void
488}
489
490define void @store_lane_i16_to_numeric_address(<8 x i16> %v) {
491; CHECK-LABEL: store_lane_i16_to_numeric_address:
492; CHECK:         .functype store_lane_i16_to_numeric_address (v128) -> ()
493; CHECK-NEXT:  # %bb.0:
494; CHECK-NEXT:    i32.const 42
495; CHECK-NEXT:    local.get 0
496; CHECK-NEXT:    v128.store16_lane 0, 0
497; CHECK-NEXT:    # fallthrough-return
498  %s = inttoptr i32 42 to i16*
499  %x = extractelement <8 x i16> %v, i32 0
500  store i16 %x, i16* %s
501  ret void
502}
503
504define void @store_lane_i16_from_global_address(<8 x i16> %v) {
505; CHECK-LABEL: store_lane_i16_from_global_address:
506; CHECK:         .functype store_lane_i16_from_global_address (v128) -> ()
507; CHECK-NEXT:  # %bb.0:
508; CHECK-NEXT:    i32.const gv_i16
509; CHECK-NEXT:    local.get 0
510; CHECK-NEXT:    v128.store16_lane 0, 0
511; CHECK-NEXT:    # fallthrough-return
512  %x = extractelement <8 x i16> %v, i32 0
513  store i16 %x, i16* @gv_i16
514  ret void
515}
516
517;===----------------------------------------------------------------------------
518; v128.load32_lane / v128.store32_lane
519;===----------------------------------------------------------------------------
520
521define <4 x i32> @load_lane_i32_no_offset(i32* %p, <4 x i32> %v) {
522; CHECK-LABEL: load_lane_i32_no_offset:
523; CHECK:         .functype load_lane_i32_no_offset (i32, v128) -> (v128)
524; CHECK-NEXT:  # %bb.0:
525; CHECK-NEXT:    local.get 0
526; CHECK-NEXT:    local.get 1
527; CHECK-NEXT:    v128.load32_lane 0, 0
528; CHECK-NEXT:    # fallthrough-return
529  %x = load i32, i32* %p
530  %t = insertelement <4 x i32> %v, i32 %x, i32 0
531  ret <4 x i32> %t
532}
533
534define <4 x i32> @load_lane_i32_with_folded_offset(i32* %p, <4 x i32> %v) {
535; CHECK-LABEL: load_lane_i32_with_folded_offset:
536; CHECK:         .functype load_lane_i32_with_folded_offset (i32, v128) -> (v128)
537; CHECK-NEXT:  # %bb.0:
538; CHECK-NEXT:    local.get 0
539; CHECK-NEXT:    i32.const 24
540; CHECK-NEXT:    i32.add
541; CHECK-NEXT:    local.get 1
542; CHECK-NEXT:    v128.load32_lane 0, 0
543; CHECK-NEXT:    # fallthrough-return
544  %q = ptrtoint i32* %p to i32
545  %r = add nuw i32 %q, 24
546  %s = inttoptr i32 %r to i32*
547  %x = load i32, i32* %s
548  %t = insertelement <4 x i32> %v, i32 %x, i32 0
549  ret <4 x i32> %t
550}
551
552define <4 x i32> @load_lane_i32_with_folded_gep_offset(i32* %p, <4 x i32> %v) {
553; CHECK-LABEL: load_lane_i32_with_folded_gep_offset:
554; CHECK:         .functype load_lane_i32_with_folded_gep_offset (i32, v128) -> (v128)
555; CHECK-NEXT:  # %bb.0:
556; CHECK-NEXT:    local.get 0
557; CHECK-NEXT:    i32.const 24
558; CHECK-NEXT:    i32.add
559; CHECK-NEXT:    local.get 1
560; CHECK-NEXT:    v128.load32_lane 0, 0
561; CHECK-NEXT:    # fallthrough-return
562  %s = getelementptr inbounds i32, i32* %p, i32 6
563  %x = load i32, i32* %s
564  %t = insertelement <4 x i32> %v, i32 %x, i32 0
565  ret <4 x i32> %t
566}
567
568define <4 x i32> @load_lane_i32_with_unfolded_gep_negative_offset(i32* %p, <4 x i32> %v) {
569; CHECK-LABEL: load_lane_i32_with_unfolded_gep_negative_offset:
570; CHECK:         .functype load_lane_i32_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
571; CHECK-NEXT:  # %bb.0:
572; CHECK-NEXT:    local.get 0
573; CHECK-NEXT:    i32.const -24
574; CHECK-NEXT:    i32.add
575; CHECK-NEXT:    local.get 1
576; CHECK-NEXT:    v128.load32_lane 0, 0
577; CHECK-NEXT:    # fallthrough-return
578  %s = getelementptr inbounds i32, i32* %p, i32 -6
579  %x = load i32, i32* %s
580  %t = insertelement <4 x i32> %v, i32 %x, i32 0
581  ret <4 x i32> %t
582}
583
584define <4 x i32> @load_lane_i32_with_unfolded_offset(i32* %p, <4 x i32> %v) {
585; CHECK-LABEL: load_lane_i32_with_unfolded_offset:
586; CHECK:         .functype load_lane_i32_with_unfolded_offset (i32, v128) -> (v128)
587; CHECK-NEXT:  # %bb.0:
588; CHECK-NEXT:    local.get 0
589; CHECK-NEXT:    i32.const 24
590; CHECK-NEXT:    i32.add
591; CHECK-NEXT:    local.get 1
592; CHECK-NEXT:    v128.load32_lane 0, 0
593; CHECK-NEXT:    # fallthrough-return
594  %q = ptrtoint i32* %p to i32
595  %r = add nsw i32 %q, 24
596  %s = inttoptr i32 %r to i32*
597  %x = load i32, i32* %s
598  %t = insertelement <4 x i32> %v, i32 %x, i32 0
599  ret <4 x i32> %t
600}
601
602define <4 x i32> @load_lane_i32_with_unfolded_gep_offset(i32* %p, <4 x i32> %v) {
603; CHECK-LABEL: load_lane_i32_with_unfolded_gep_offset:
604; CHECK:         .functype load_lane_i32_with_unfolded_gep_offset (i32, v128) -> (v128)
605; CHECK-NEXT:  # %bb.0:
606; CHECK-NEXT:    local.get 0
607; CHECK-NEXT:    i32.const 24
608; CHECK-NEXT:    i32.add
609; CHECK-NEXT:    local.get 1
610; CHECK-NEXT:    v128.load32_lane 0, 0
611; CHECK-NEXT:    # fallthrough-return
612  %s = getelementptr i32, i32* %p, i32 6
613  %x = load i32, i32* %s
614  %t = insertelement <4 x i32> %v, i32 %x, i32 0
615  ret <4 x i32> %t
616}
617
618define <4 x i32> @load_lane_i32_from_numeric_address(<4 x i32> %v) {
619; CHECK-LABEL: load_lane_i32_from_numeric_address:
620; CHECK:         .functype load_lane_i32_from_numeric_address (v128) -> (v128)
621; CHECK-NEXT:  # %bb.0:
622; CHECK-NEXT:    i32.const 42
623; CHECK-NEXT:    local.get 0
624; CHECK-NEXT:    v128.load32_lane 0, 0
625; CHECK-NEXT:    # fallthrough-return
626  %s = inttoptr i32 42 to i32*
627  %x = load i32, i32* %s
628  %t = insertelement <4 x i32> %v, i32 %x, i32 0
629  ret <4 x i32> %t
630}
631
632@gv_i32 = global i32 0
633define <4 x i32> @load_lane_i32_from_global_address(<4 x i32> %v) {
634; CHECK-LABEL: load_lane_i32_from_global_address:
635; CHECK:         .functype load_lane_i32_from_global_address (v128) -> (v128)
636; CHECK-NEXT:  # %bb.0:
637; CHECK-NEXT:    i32.const gv_i32
638; CHECK-NEXT:    local.get 0
639; CHECK-NEXT:    v128.load32_lane 0, 0
640; CHECK-NEXT:    # fallthrough-return
641  %x = load i32, i32* @gv_i32
642  %t = insertelement <4 x i32> %v, i32 %x, i32 0
643  ret <4 x i32> %t
644}
645
646define void @store_lane_i32_no_offset(<4 x i32> %v, i32* %p) {
647; CHECK-LABEL: store_lane_i32_no_offset:
648; CHECK:         .functype store_lane_i32_no_offset (v128, i32) -> ()
649; CHECK-NEXT:  # %bb.0:
650; CHECK-NEXT:    local.get 1
651; CHECK-NEXT:    local.get 0
652; CHECK-NEXT:    v128.store32_lane 0, 0
653; CHECK-NEXT:    # fallthrough-return
654  %x = extractelement <4 x i32> %v, i32 0
655  store i32 %x, i32* %p
656  ret void
657}
658
659define void @store_lane_i32_with_folded_offset(<4 x i32> %v, i32* %p) {
660; CHECK-LABEL: store_lane_i32_with_folded_offset:
661; CHECK:         .functype store_lane_i32_with_folded_offset (v128, i32) -> ()
662; CHECK-NEXT:  # %bb.0:
663; CHECK-NEXT:    local.get 1
664; CHECK-NEXT:    i32.const 24
665; CHECK-NEXT:    i32.add
666; CHECK-NEXT:    local.get 0
667; CHECK-NEXT:    v128.store32_lane 0, 0
668; CHECK-NEXT:    # fallthrough-return
669  %q = ptrtoint i32* %p to i32
670  %r = add nuw i32 %q, 24
671  %s = inttoptr i32 %r to i32*
672  %x = extractelement <4 x i32> %v, i32 0
673  store i32 %x, i32* %s
674  ret void
675}
676
677define void @store_lane_i32_with_folded_gep_offset(<4 x i32> %v, i32* %p) {
678; CHECK-LABEL: store_lane_i32_with_folded_gep_offset:
679; CHECK:         .functype store_lane_i32_with_folded_gep_offset (v128, i32) -> ()
680; CHECK-NEXT:  # %bb.0:
681; CHECK-NEXT:    local.get 1
682; CHECK-NEXT:    i32.const 24
683; CHECK-NEXT:    i32.add
684; CHECK-NEXT:    local.get 0
685; CHECK-NEXT:    v128.store32_lane 0, 0
686; CHECK-NEXT:    # fallthrough-return
687  %s = getelementptr inbounds i32, i32* %p, i32 6
688  %x = extractelement <4 x i32> %v, i32 0
689  store i32 %x, i32* %s
690  ret void
691}
692
693define void @store_lane_i32_with_unfolded_gep_negative_offset(<4 x i32> %v, i32* %p) {
694; CHECK-LABEL: store_lane_i32_with_unfolded_gep_negative_offset:
695; CHECK:         .functype store_lane_i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
696; CHECK-NEXT:  # %bb.0:
697; CHECK-NEXT:    local.get 1
698; CHECK-NEXT:    i32.const -24
699; CHECK-NEXT:    i32.add
700; CHECK-NEXT:    local.get 0
701; CHECK-NEXT:    v128.store32_lane 0, 0
702; CHECK-NEXT:    # fallthrough-return
703  %s = getelementptr inbounds i32, i32* %p, i32 -6
704  %x = extractelement <4 x i32> %v, i32 0
705  store i32 %x, i32* %s
706  ret void
707}
708
709define void @store_lane_i32_with_unfolded_offset(<4 x i32> %v, i32* %p) {
710; CHECK-LABEL: store_lane_i32_with_unfolded_offset:
711; CHECK:         .functype store_lane_i32_with_unfolded_offset (v128, i32) -> ()
712; CHECK-NEXT:  # %bb.0:
713; CHECK-NEXT:    local.get 1
714; CHECK-NEXT:    i32.const 24
715; CHECK-NEXT:    i32.add
716; CHECK-NEXT:    local.get 0
717; CHECK-NEXT:    v128.store32_lane 0, 0
718; CHECK-NEXT:    # fallthrough-return
719  %q = ptrtoint i32* %p to i32
720  %r = add nsw i32 %q, 24
721  %s = inttoptr i32 %r to i32*
722  %x = extractelement <4 x i32> %v, i32 0
723  store i32 %x, i32* %s
724  ret void
725}
726
727define void @store_lane_i32_with_unfolded_gep_offset(<4 x i32> %v, i32* %p) {
728; CHECK-LABEL: store_lane_i32_with_unfolded_gep_offset:
729; CHECK:         .functype store_lane_i32_with_unfolded_gep_offset (v128, i32) -> ()
730; CHECK-NEXT:  # %bb.0:
731; CHECK-NEXT:    local.get 1
732; CHECK-NEXT:    i32.const 24
733; CHECK-NEXT:    i32.add
734; CHECK-NEXT:    local.get 0
735; CHECK-NEXT:    v128.store32_lane 0, 0
736; CHECK-NEXT:    # fallthrough-return
737  %s = getelementptr i32, i32* %p, i32 6
738  %x = extractelement <4 x i32> %v, i32 0
739  store i32 %x, i32* %s
740  ret void
741}
742
743define void @store_lane_i32_to_numeric_address(<4 x i32> %v) {
744; CHECK-LABEL: store_lane_i32_to_numeric_address:
745; CHECK:         .functype store_lane_i32_to_numeric_address (v128) -> ()
746; CHECK-NEXT:  # %bb.0:
747; CHECK-NEXT:    i32.const 42
748; CHECK-NEXT:    local.get 0
749; CHECK-NEXT:    v128.store32_lane 0, 0
750; CHECK-NEXT:    # fallthrough-return
751  %s = inttoptr i32 42 to i32*
752  %x = extractelement <4 x i32> %v, i32 0
753  store i32 %x, i32* %s
754  ret void
755}
756
757define void @store_lane_i32_from_global_address(<4 x i32> %v) {
758; CHECK-LABEL: store_lane_i32_from_global_address:
759; CHECK:         .functype store_lane_i32_from_global_address (v128) -> ()
760; CHECK-NEXT:  # %bb.0:
761; CHECK-NEXT:    i32.const gv_i32
762; CHECK-NEXT:    local.get 0
763; CHECK-NEXT:    v128.store32_lane 0, 0
764; CHECK-NEXT:    # fallthrough-return
765  %x = extractelement <4 x i32> %v, i32 0
766  store i32 %x, i32* @gv_i32
767  ret void
768}
769
770;===----------------------------------------------------------------------------
771; v128.load64_lane / v128.store64_lane
772;===----------------------------------------------------------------------------
773
774define <2 x i64> @load_lane_i64_no_offset(i64* %p, <2 x i64> %v) {
775; CHECK-LABEL: load_lane_i64_no_offset:
776; CHECK:         .functype load_lane_i64_no_offset (i32, v128) -> (v128)
777; CHECK-NEXT:  # %bb.0:
778; CHECK-NEXT:    local.get 0
779; CHECK-NEXT:    local.get 1
780; CHECK-NEXT:    v128.load64_lane 0, 0
781; CHECK-NEXT:    # fallthrough-return
782  %x = load i64, i64* %p
783  %t = insertelement <2 x i64> %v, i64 %x, i32 0
784  ret <2 x i64> %t
785}
786
787define <2 x i64> @load_lane_i64_with_folded_offset(i64* %p, <2 x i64> %v) {
788; CHECK-LABEL: load_lane_i64_with_folded_offset:
789; CHECK:         .functype load_lane_i64_with_folded_offset (i32, v128) -> (v128)
790; CHECK-NEXT:  # %bb.0:
791; CHECK-NEXT:    local.get 0
792; CHECK-NEXT:    i32.const 24
793; CHECK-NEXT:    i32.add
794; CHECK-NEXT:    local.get 1
795; CHECK-NEXT:    v128.load64_lane 0, 0
796; CHECK-NEXT:    # fallthrough-return
797  %q = ptrtoint i64* %p to i32
798  %r = add nuw i32 %q, 24
799  %s = inttoptr i32 %r to i64*
800  %x = load i64, i64* %s
801  %t = insertelement <2 x i64> %v, i64 %x, i32 0
802  ret <2 x i64> %t
803}
804
805define <2 x i64> @load_lane_i64_with_folded_gep_offset(i64* %p, <2 x i64> %v) {
806; CHECK-LABEL: load_lane_i64_with_folded_gep_offset:
807; CHECK:         .functype load_lane_i64_with_folded_gep_offset (i32, v128) -> (v128)
808; CHECK-NEXT:  # %bb.0:
809; CHECK-NEXT:    local.get 0
810; CHECK-NEXT:    i32.const 48
811; CHECK-NEXT:    i32.add
812; CHECK-NEXT:    local.get 1
813; CHECK-NEXT:    v128.load64_lane 0, 0
814; CHECK-NEXT:    # fallthrough-return
815  %s = getelementptr inbounds i64, i64* %p, i32 6
816  %x = load i64, i64* %s
817  %t = insertelement <2 x i64> %v, i64 %x, i32 0
818  ret <2 x i64> %t
819}
820
821define <2 x i64> @load_lane_i64_with_unfolded_gep_negative_offset(i64* %p, <2 x i64> %v) {
822; CHECK-LABEL: load_lane_i64_with_unfolded_gep_negative_offset:
823; CHECK:         .functype load_lane_i64_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
824; CHECK-NEXT:  # %bb.0:
825; CHECK-NEXT:    local.get 0
826; CHECK-NEXT:    i32.const -48
827; CHECK-NEXT:    i32.add
828; CHECK-NEXT:    local.get 1
829; CHECK-NEXT:    v128.load64_lane 0, 0
830; CHECK-NEXT:    # fallthrough-return
831  %s = getelementptr inbounds i64, i64* %p, i32 -6
832  %x = load i64, i64* %s
833  %t = insertelement <2 x i64> %v, i64 %x, i32 0
834  ret <2 x i64> %t
835}
836
837define <2 x i64> @load_lane_i64_with_unfolded_offset(i64* %p, <2 x i64> %v) {
838; CHECK-LABEL: load_lane_i64_with_unfolded_offset:
839; CHECK:         .functype load_lane_i64_with_unfolded_offset (i32, v128) -> (v128)
840; CHECK-NEXT:  # %bb.0:
841; CHECK-NEXT:    local.get 0
842; CHECK-NEXT:    i32.const 24
843; CHECK-NEXT:    i32.add
844; CHECK-NEXT:    local.get 1
845; CHECK-NEXT:    v128.load64_lane 0, 0
846; CHECK-NEXT:    # fallthrough-return
847  %q = ptrtoint i64* %p to i32
848  %r = add nsw i32 %q, 24
849  %s = inttoptr i32 %r to i64*
850  %x = load i64, i64* %s
851  %t = insertelement <2 x i64> %v, i64 %x, i32 0
852  ret <2 x i64> %t
853}
854
855define <2 x i64> @load_lane_i64_with_unfolded_gep_offset(i64* %p, <2 x i64> %v) {
856; CHECK-LABEL: load_lane_i64_with_unfolded_gep_offset:
857; CHECK:         .functype load_lane_i64_with_unfolded_gep_offset (i32, v128) -> (v128)
858; CHECK-NEXT:  # %bb.0:
859; CHECK-NEXT:    local.get 0
860; CHECK-NEXT:    i32.const 48
861; CHECK-NEXT:    i32.add
862; CHECK-NEXT:    local.get 1
863; CHECK-NEXT:    v128.load64_lane 0, 0
864; CHECK-NEXT:    # fallthrough-return
865  %s = getelementptr i64, i64* %p, i32 6
866  %x = load i64, i64* %s
867  %t = insertelement <2 x i64> %v, i64 %x, i32 0
868  ret <2 x i64> %t
869}
870
871define <2 x i64> @load_lane_i64_from_numeric_address(<2 x i64> %v) {
872; CHECK-LABEL: load_lane_i64_from_numeric_address:
873; CHECK:         .functype load_lane_i64_from_numeric_address (v128) -> (v128)
874; CHECK-NEXT:  # %bb.0:
875; CHECK-NEXT:    i32.const 42
876; CHECK-NEXT:    local.get 0
877; CHECK-NEXT:    v128.load64_lane 0, 0
878; CHECK-NEXT:    # fallthrough-return
879  %s = inttoptr i32 42 to i64*
880  %x = load i64, i64* %s
881  %t = insertelement <2 x i64> %v, i64 %x, i32 0
882  ret <2 x i64> %t
883}
884
885@gv_i64 = global i64 0
886define <2 x i64> @load_lane_i64_from_global_address(<2 x i64> %v) {
887; CHECK-LABEL: load_lane_i64_from_global_address:
888; CHECK:         .functype load_lane_i64_from_global_address (v128) -> (v128)
889; CHECK-NEXT:  # %bb.0:
890; CHECK-NEXT:    i32.const gv_i64
891; CHECK-NEXT:    local.get 0
892; CHECK-NEXT:    v128.load64_lane 0, 0
893; CHECK-NEXT:    # fallthrough-return
894  %x = load i64, i64* @gv_i64
895  %t = insertelement <2 x i64> %v, i64 %x, i32 0
896  ret <2 x i64> %t
897}
898
899define void @store_lane_i64_no_offset(<2 x i64> %v, i64* %p) {
900; CHECK-LABEL: store_lane_i64_no_offset:
901; CHECK:         .functype store_lane_i64_no_offset (v128, i32) -> ()
902; CHECK-NEXT:  # %bb.0:
903; CHECK-NEXT:    local.get 1
904; CHECK-NEXT:    local.get 0
905; CHECK-NEXT:    v128.store64_lane 0, 0
906; CHECK-NEXT:    # fallthrough-return
907  %x = extractelement <2 x i64> %v, i32 0
908  store i64 %x, i64* %p
909  ret void
910}
911
912define void @store_lane_i64_with_folded_offset(<2 x i64> %v, i64* %p) {
913; CHECK-LABEL: store_lane_i64_with_folded_offset:
914; CHECK:         .functype store_lane_i64_with_folded_offset (v128, i32) -> ()
915; CHECK-NEXT:  # %bb.0:
916; CHECK-NEXT:    local.get 1
917; CHECK-NEXT:    i32.const 24
918; CHECK-NEXT:    i32.add
919; CHECK-NEXT:    local.get 0
920; CHECK-NEXT:    v128.store64_lane 0, 0
921; CHECK-NEXT:    # fallthrough-return
922  %q = ptrtoint i64* %p to i32
923  %r = add nuw i32 %q, 24
924  %s = inttoptr i32 %r to i64*
925  %x = extractelement <2 x i64> %v, i32 0
926  store i64 %x, i64* %s
927  ret void
928}
929
930define void @store_lane_i64_with_folded_gep_offset(<2 x i64> %v, i64* %p) {
931; CHECK-LABEL: store_lane_i64_with_folded_gep_offset:
932; CHECK:         .functype store_lane_i64_with_folded_gep_offset (v128, i32) -> ()
933; CHECK-NEXT:  # %bb.0:
934; CHECK-NEXT:    local.get 1
935; CHECK-NEXT:    i32.const 48
936; CHECK-NEXT:    i32.add
937; CHECK-NEXT:    local.get 0
938; CHECK-NEXT:    v128.store64_lane 0, 0
939; CHECK-NEXT:    # fallthrough-return
940  %s = getelementptr inbounds i64, i64* %p, i32 6
941  %x = extractelement <2 x i64> %v, i32 0
942  store i64 %x, i64* %s
943  ret void
944}
945
946define void @store_lane_i64_with_unfolded_gep_negative_offset(<2 x i64> %v, i64* %p) {
947; CHECK-LABEL: store_lane_i64_with_unfolded_gep_negative_offset:
948; CHECK:         .functype store_lane_i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
949; CHECK-NEXT:  # %bb.0:
950; CHECK-NEXT:    local.get 1
951; CHECK-NEXT:    i32.const -48
952; CHECK-NEXT:    i32.add
953; CHECK-NEXT:    local.get 0
954; CHECK-NEXT:    v128.store64_lane 0, 0
955; CHECK-NEXT:    # fallthrough-return
956  %s = getelementptr inbounds i64, i64* %p, i32 -6
957  %x = extractelement <2 x i64> %v, i32 0
958  store i64 %x, i64* %s
959  ret void
960}
961
962define void @store_lane_i64_with_unfolded_offset(<2 x i64> %v, i64* %p) {
963; CHECK-LABEL: store_lane_i64_with_unfolded_offset:
964; CHECK:         .functype store_lane_i64_with_unfolded_offset (v128, i32) -> ()
965; CHECK-NEXT:  # %bb.0:
966; CHECK-NEXT:    local.get 1
967; CHECK-NEXT:    i32.const 24
968; CHECK-NEXT:    i32.add
969; CHECK-NEXT:    local.get 0
970; CHECK-NEXT:    v128.store64_lane 0, 0
971; CHECK-NEXT:    # fallthrough-return
972  %q = ptrtoint i64* %p to i32
973  %r = add nsw i32 %q, 24
974  %s = inttoptr i32 %r to i64*
975  %x = extractelement <2 x i64> %v, i32 0
976  store i64 %x, i64* %s
977  ret void
978}
979
980define void @store_lane_i64_with_unfolded_gep_offset(<2 x i64> %v, i64* %p) {
981; CHECK-LABEL: store_lane_i64_with_unfolded_gep_offset:
982; CHECK:         .functype store_lane_i64_with_unfolded_gep_offset (v128, i32) -> ()
983; CHECK-NEXT:  # %bb.0:
984; CHECK-NEXT:    local.get 1
985; CHECK-NEXT:    i32.const 48
986; CHECK-NEXT:    i32.add
987; CHECK-NEXT:    local.get 0
988; CHECK-NEXT:    v128.store64_lane 0, 0
989; CHECK-NEXT:    # fallthrough-return
990  %s = getelementptr i64, i64* %p, i32 6
991  %x = extractelement <2 x i64> %v, i32 0
992  store i64 %x, i64* %s
993  ret void
994}
995
996define void @store_lane_i64_to_numeric_address(<2 x i64> %v) {
997; CHECK-LABEL: store_lane_i64_to_numeric_address:
998; CHECK:         .functype store_lane_i64_to_numeric_address (v128) -> ()
999; CHECK-NEXT:  # %bb.0:
1000; CHECK-NEXT:    i32.const 42
1001; CHECK-NEXT:    local.get 0
1002; CHECK-NEXT:    v128.store64_lane 0, 0
1003; CHECK-NEXT:    # fallthrough-return
1004  %s = inttoptr i32 42 to i64*
1005  %x = extractelement <2 x i64> %v, i32 0
1006  store i64 %x, i64* %s
1007  ret void
1008}
1009
1010define void @store_lane_i64_from_global_address(<2 x i64> %v) {
1011; CHECK-LABEL: store_lane_i64_from_global_address:
1012; CHECK:         .functype store_lane_i64_from_global_address (v128) -> ()
1013; CHECK-NEXT:  # %bb.0:
1014; CHECK-NEXT:    i32.const gv_i64
1015; CHECK-NEXT:    local.get 0
1016; CHECK-NEXT:    v128.store64_lane 0, 0
1017; CHECK-NEXT:    # fallthrough-return
1018  %x = extractelement <2 x i64> %v, i32 0
1019  store i64 %x, i64* @gv_i64
1020  ret void
1021}
1022