1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test SIMD v128.load{32,64}_zero instructions
5
6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
7target triple = "wasm32-unknown-unknown"
8
9declare <4 x i32> @llvm.wasm.load32.zero(i32*)
10declare <2 x i64> @llvm.wasm.load64.zero(i64*)
11
12;===----------------------------------------------------------------------------
13; v128.load32_zero
14;===----------------------------------------------------------------------------
15
16define <4 x i32> @load_zero_i32_no_offset(i32* %p) {
17; CHECK-LABEL: load_zero_i32_no_offset:
18; CHECK:         .functype load_zero_i32_no_offset (i32) -> (v128)
19; CHECK-NEXT:  # %bb.0:
20; CHECK-NEXT:    local.get 0
21; CHECK-NEXT:    v128.load32_zero 0
22; CHECK-NEXT:    # fallthrough-return
23  %v = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %p)
24  ret <4 x i32> %v
25}
26
27define <4 x i32> @load_zero_i32_with_folded_offset(i32* %p) {
28; CHECK-LABEL: load_zero_i32_with_folded_offset:
29; CHECK:         .functype load_zero_i32_with_folded_offset (i32) -> (v128)
30; CHECK-NEXT:  # %bb.0:
31; CHECK-NEXT:    local.get 0
32; CHECK-NEXT:    v128.load32_zero 24
33; CHECK-NEXT:    # fallthrough-return
34  %q = ptrtoint i32* %p to i32
35  %r = add nuw i32 %q, 24
36  %s = inttoptr i32 %r to i32*
37  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
38  ret <4 x i32> %t
39}
40
41define <4 x i32> @load_zero_i32_with_folded_gep_offset(i32* %p) {
42; CHECK-LABEL: load_zero_i32_with_folded_gep_offset:
43; CHECK:         .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128)
44; CHECK-NEXT:  # %bb.0:
45; CHECK-NEXT:    local.get 0
46; CHECK-NEXT:    v128.load32_zero 24
47; CHECK-NEXT:    # fallthrough-return
48  %s = getelementptr inbounds i32, i32* %p, i32 6
49  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
50  ret <4 x i32> %t
51}
52
53define <4 x i32> @load_zero_i32_with_unfolded_gep_negative_offset(i32* %p) {
54; CHECK-LABEL: load_zero_i32_with_unfolded_gep_negative_offset:
55; CHECK:         .functype load_zero_i32_with_unfolded_gep_negative_offset (i32) -> (v128)
56; CHECK-NEXT:  # %bb.0:
57; CHECK-NEXT:    local.get 0
58; CHECK-NEXT:    i32.const -24
59; CHECK-NEXT:    i32.add
60; CHECK-NEXT:    v128.load32_zero 0
61; CHECK-NEXT:    # fallthrough-return
62  %s = getelementptr inbounds i32, i32* %p, i32 -6
63  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
64  ret <4 x i32> %t
65}
66
67define <4 x i32> @load_zero_i32_with_unfolded_offset(i32* %p) {
68; CHECK-LABEL: load_zero_i32_with_unfolded_offset:
69; CHECK:         .functype load_zero_i32_with_unfolded_offset (i32) -> (v128)
70; CHECK-NEXT:  # %bb.0:
71; CHECK-NEXT:    local.get 0
72; CHECK-NEXT:    i32.const 24
73; CHECK-NEXT:    i32.add
74; CHECK-NEXT:    v128.load32_zero 0
75; CHECK-NEXT:    # fallthrough-return
76  %q = ptrtoint i32* %p to i32
77  %r = add nsw i32 %q, 24
78  %s = inttoptr i32 %r to i32*
79  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
80  ret <4 x i32> %t
81}
82
83define <4 x i32> @load_zero_i32_with_unfolded_gep_offset(i32* %p) {
84; CHECK-LABEL: load_zero_i32_with_unfolded_gep_offset:
85; CHECK:         .functype load_zero_i32_with_unfolded_gep_offset (i32) -> (v128)
86; CHECK-NEXT:  # %bb.0:
87; CHECK-NEXT:    local.get 0
88; CHECK-NEXT:    i32.const 24
89; CHECK-NEXT:    i32.add
90; CHECK-NEXT:    v128.load32_zero 0
91; CHECK-NEXT:    # fallthrough-return
92  %s = getelementptr i32, i32* %p, i32 6
93  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
94  ret <4 x i32> %t
95}
96
97define <4 x i32> @load_zero_i32_from_numeric_address() {
98; CHECK-LABEL: load_zero_i32_from_numeric_address:
99; CHECK:         .functype load_zero_i32_from_numeric_address () -> (v128)
100; CHECK-NEXT:  # %bb.0:
101; CHECK-NEXT:    i32.const 0
102; CHECK-NEXT:    v128.load32_zero 42
103; CHECK-NEXT:    # fallthrough-return
104  %s = inttoptr i32 42 to i32*
105  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
106  ret <4 x i32> %t
107}
108
109@gv_i32 = global i32 0
110define <4 x i32> @load_zero_i32_from_global_address() {
111; CHECK-LABEL: load_zero_i32_from_global_address:
112; CHECK:         .functype load_zero_i32_from_global_address () -> (v128)
113; CHECK-NEXT:  # %bb.0:
114; CHECK-NEXT:    i32.const 0
115; CHECK-NEXT:    v128.load32_zero gv_i32
116; CHECK-NEXT:    # fallthrough-return
117  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* @gv_i32)
118  ret <4 x i32> %t
119}
120
121;===----------------------------------------------------------------------------
122; v128.load64_zero
123;===----------------------------------------------------------------------------
124
125define <2 x i64> @load_zero_i64_no_offset(i64* %p) {
126; CHECK-LABEL: load_zero_i64_no_offset:
127; CHECK:         .functype load_zero_i64_no_offset (i32) -> (v128)
128; CHECK-NEXT:  # %bb.0:
129; CHECK-NEXT:    local.get 0
130; CHECK-NEXT:    v128.load64_zero 0
131; CHECK-NEXT:    # fallthrough-return
132  %v = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %p)
133  ret <2 x i64> %v
134}
135
136define <2 x i64> @load_zero_i64_with_folded_offset(i64* %p) {
137; CHECK-LABEL: load_zero_i64_with_folded_offset:
138; CHECK:         .functype load_zero_i64_with_folded_offset (i32) -> (v128)
139; CHECK-NEXT:  # %bb.0:
140; CHECK-NEXT:    local.get 0
141; CHECK-NEXT:    v128.load64_zero 24
142; CHECK-NEXT:    # fallthrough-return
143  %q = ptrtoint i64* %p to i32
144  %r = add nuw i32 %q, 24
145  %s = inttoptr i32 %r to i64*
146  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
147  ret <2 x i64> %t
148}
149
150define <2 x i64> @load_zero_i64_with_folded_gep_offset(i64* %p) {
151; CHECK-LABEL: load_zero_i64_with_folded_gep_offset:
152; CHECK:         .functype load_zero_i64_with_folded_gep_offset (i32) -> (v128)
153; CHECK-NEXT:  # %bb.0:
154; CHECK-NEXT:    local.get 0
155; CHECK-NEXT:    v128.load64_zero 48
156; CHECK-NEXT:    # fallthrough-return
157  %s = getelementptr inbounds i64, i64* %p, i64 6
158  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
159  ret <2 x i64> %t
160}
161
162define <2 x i64> @load_zero_i64_with_unfolded_gep_negative_offset(i64* %p) {
163; CHECK-LABEL: load_zero_i64_with_unfolded_gep_negative_offset:
164; CHECK:         .functype load_zero_i64_with_unfolded_gep_negative_offset (i32) -> (v128)
165; CHECK-NEXT:  # %bb.0:
166; CHECK-NEXT:    local.get 0
167; CHECK-NEXT:    i32.const -48
168; CHECK-NEXT:    i32.add
169; CHECK-NEXT:    v128.load64_zero 0
170; CHECK-NEXT:    # fallthrough-return
171  %s = getelementptr inbounds i64, i64* %p, i64 -6
172  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
173  ret <2 x i64> %t
174}
175
176define <2 x i64> @load_zero_i64_with_unfolded_offset(i64* %p) {
177; CHECK-LABEL: load_zero_i64_with_unfolded_offset:
178; CHECK:         .functype load_zero_i64_with_unfolded_offset (i32) -> (v128)
179; CHECK-NEXT:  # %bb.0:
180; CHECK-NEXT:    local.get 0
181; CHECK-NEXT:    i32.const 24
182; CHECK-NEXT:    i32.add
183; CHECK-NEXT:    v128.load64_zero 0
184; CHECK-NEXT:    # fallthrough-return
185  %q = ptrtoint i64* %p to i32
186  %r = add nsw i32 %q, 24
187  %s = inttoptr i32 %r to i64*
188  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
189  ret <2 x i64> %t
190}
191
192define <2 x i64> @load_zero_i64_with_unfolded_gep_offset(i64* %p) {
193; CHECK-LABEL: load_zero_i64_with_unfolded_gep_offset:
194; CHECK:         .functype load_zero_i64_with_unfolded_gep_offset (i32) -> (v128)
195; CHECK-NEXT:  # %bb.0:
196; CHECK-NEXT:    local.get 0
197; CHECK-NEXT:    i32.const 48
198; CHECK-NEXT:    i32.add
199; CHECK-NEXT:    v128.load64_zero 0
200; CHECK-NEXT:    # fallthrough-return
201  %s = getelementptr i64, i64* %p, i64 6
202  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
203  ret <2 x i64> %t
204}
205
206define <2 x i64> @load_zero_i64_from_numeric_address() {
207; CHECK-LABEL: load_zero_i64_from_numeric_address:
208; CHECK:         .functype load_zero_i64_from_numeric_address () -> (v128)
209; CHECK-NEXT:  # %bb.0:
210; CHECK-NEXT:    i32.const 0
211; CHECK-NEXT:    v128.load64_zero 42
212; CHECK-NEXT:    # fallthrough-return
213  %s = inttoptr i32 42 to i64*
214  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
215  ret <2 x i64> %t
216}
217
218@gv_i64 = global i64 0
219define <2 x i64> @load_zero_i64_from_global_address() {
220; CHECK-LABEL: load_zero_i64_from_global_address:
221; CHECK:         .functype load_zero_i64_from_global_address () -> (v128)
222; CHECK-NEXT:  # %bb.0:
223; CHECK-NEXT:    i32.const 0
224; CHECK-NEXT:    v128.load64_zero gv_i64
225; CHECK-NEXT:    # fallthrough-return
226  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* @gv_i64)
227  ret <2 x i64> %t
228}
229