1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test SIMD v128.load{32,64}_zero instructions
5
6target triple = "wasm32-unknown-unknown"
7
8declare <4 x i32> @llvm.wasm.load32.zero(i32*)
9declare <2 x i64> @llvm.wasm.load64.zero(i64*)
10
11;===----------------------------------------------------------------------------
12; v128.load32_zero
13;===----------------------------------------------------------------------------
14
15define <4 x i32> @load_zero_i32_no_offset(i32* %p) {
16; CHECK-LABEL: load_zero_i32_no_offset:
17; CHECK:         .functype load_zero_i32_no_offset (i32) -> (v128)
18; CHECK-NEXT:  # %bb.0:
19; CHECK-NEXT:    local.get 0
20; CHECK-NEXT:    v128.load32_zero 0:p2align=0
21; CHECK-NEXT:    # fallthrough-return
22  %v = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %p)
23  ret <4 x i32> %v
24}
25
26define <4 x i32> @load_zero_i32_with_folded_offset(i32* %p) {
27; CHECK-LABEL: load_zero_i32_with_folded_offset:
28; CHECK:         .functype load_zero_i32_with_folded_offset (i32) -> (v128)
29; CHECK-NEXT:  # %bb.0:
30; CHECK-NEXT:    local.get 0
31; CHECK-NEXT:    v128.load32_zero 24:p2align=0
32; CHECK-NEXT:    # fallthrough-return
33  %q = ptrtoint i32* %p to i32
34  %r = add nuw i32 %q, 24
35  %s = inttoptr i32 %r to i32*
36  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
37  ret <4 x i32> %t
38}
39
40define <4 x i32> @load_zero_i32_with_folded_gep_offset(i32* %p) {
41; CHECK-LABEL: load_zero_i32_with_folded_gep_offset:
42; CHECK:         .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128)
43; CHECK-NEXT:  # %bb.0:
44; CHECK-NEXT:    local.get 0
45; CHECK-NEXT:    v128.load32_zero 24:p2align=0
46; CHECK-NEXT:    # fallthrough-return
47  %s = getelementptr inbounds i32, i32* %p, i32 6
48  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
49  ret <4 x i32> %t
50}
51
52define <4 x i32> @load_zero_i32_with_unfolded_gep_negative_offset(i32* %p) {
53; CHECK-LABEL: load_zero_i32_with_unfolded_gep_negative_offset:
54; CHECK:         .functype load_zero_i32_with_unfolded_gep_negative_offset (i32) -> (v128)
55; CHECK-NEXT:  # %bb.0:
56; CHECK-NEXT:    local.get 0
57; CHECK-NEXT:    i32.const -24
58; CHECK-NEXT:    i32.add
59; CHECK-NEXT:    v128.load32_zero 0:p2align=0
60; CHECK-NEXT:    # fallthrough-return
61  %s = getelementptr inbounds i32, i32* %p, i32 -6
62  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
63  ret <4 x i32> %t
64}
65
66define <4 x i32> @load_zero_i32_with_unfolded_offset(i32* %p) {
67; CHECK-LABEL: load_zero_i32_with_unfolded_offset:
68; CHECK:         .functype load_zero_i32_with_unfolded_offset (i32) -> (v128)
69; CHECK-NEXT:  # %bb.0:
70; CHECK-NEXT:    local.get 0
71; CHECK-NEXT:    i32.const 24
72; CHECK-NEXT:    i32.add
73; CHECK-NEXT:    v128.load32_zero 0:p2align=0
74; CHECK-NEXT:    # fallthrough-return
75  %q = ptrtoint i32* %p to i32
76  %r = add nsw i32 %q, 24
77  %s = inttoptr i32 %r to i32*
78  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
79  ret <4 x i32> %t
80}
81
82define <4 x i32> @load_zero_i32_with_unfolded_gep_offset(i32* %p) {
83; CHECK-LABEL: load_zero_i32_with_unfolded_gep_offset:
84; CHECK:         .functype load_zero_i32_with_unfolded_gep_offset (i32) -> (v128)
85; CHECK-NEXT:  # %bb.0:
86; CHECK-NEXT:    local.get 0
87; CHECK-NEXT:    i32.const 24
88; CHECK-NEXT:    i32.add
89; CHECK-NEXT:    v128.load32_zero 0:p2align=0
90; CHECK-NEXT:    # fallthrough-return
91  %s = getelementptr i32, i32* %p, i32 6
92  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
93  ret <4 x i32> %t
94}
95
96define <4 x i32> @load_zero_i32_from_numeric_address() {
97; CHECK-LABEL: load_zero_i32_from_numeric_address:
98; CHECK:         .functype load_zero_i32_from_numeric_address () -> (v128)
99; CHECK-NEXT:  # %bb.0:
100; CHECK-NEXT:    i32.const 0
101; CHECK-NEXT:    v128.load32_zero 42:p2align=0
102; CHECK-NEXT:    # fallthrough-return
103  %s = inttoptr i32 42 to i32*
104  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
105  ret <4 x i32> %t
106}
107
108@gv_i32 = global i32 0
109define <4 x i32> @load_zero_i32_from_global_address() {
110; CHECK-LABEL: load_zero_i32_from_global_address:
111; CHECK:         .functype load_zero_i32_from_global_address () -> (v128)
112; CHECK-NEXT:  # %bb.0:
113; CHECK-NEXT:    i32.const 0
114; CHECK-NEXT:    v128.load32_zero gv_i32:p2align=0
115; CHECK-NEXT:    # fallthrough-return
116  %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* @gv_i32)
117  ret <4 x i32> %t
118}
119
120;===----------------------------------------------------------------------------
121; v128.load64_zero
122;===----------------------------------------------------------------------------
123
124define <2 x i64> @load_zero_i64_no_offset(i64* %p) {
125; CHECK-LABEL: load_zero_i64_no_offset:
126; CHECK:         .functype load_zero_i64_no_offset (i32) -> (v128)
127; CHECK-NEXT:  # %bb.0:
128; CHECK-NEXT:    local.get 0
129; CHECK-NEXT:    v128.load64_zero 0:p2align=0
130; CHECK-NEXT:    # fallthrough-return
131  %v = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %p)
132  ret <2 x i64> %v
133}
134
135define <2 x i64> @load_zero_i64_with_folded_offset(i64* %p) {
136; CHECK-LABEL: load_zero_i64_with_folded_offset:
137; CHECK:         .functype load_zero_i64_with_folded_offset (i32) -> (v128)
138; CHECK-NEXT:  # %bb.0:
139; CHECK-NEXT:    local.get 0
140; CHECK-NEXT:    v128.load64_zero 24:p2align=0
141; CHECK-NEXT:    # fallthrough-return
142  %q = ptrtoint i64* %p to i32
143  %r = add nuw i32 %q, 24
144  %s = inttoptr i32 %r to i64*
145  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
146  ret <2 x i64> %t
147}
148
149define <2 x i64> @load_zero_i64_with_folded_gep_offset(i64* %p) {
150; CHECK-LABEL: load_zero_i64_with_folded_gep_offset:
151; CHECK:         .functype load_zero_i64_with_folded_gep_offset (i32) -> (v128)
152; CHECK-NEXT:  # %bb.0:
153; CHECK-NEXT:    local.get 0
154; CHECK-NEXT:    v128.load64_zero 48:p2align=0
155; CHECK-NEXT:    # fallthrough-return
156  %s = getelementptr inbounds i64, i64* %p, i64 6
157  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
158  ret <2 x i64> %t
159}
160
161define <2 x i64> @load_zero_i64_with_unfolded_gep_negative_offset(i64* %p) {
162; CHECK-LABEL: load_zero_i64_with_unfolded_gep_negative_offset:
163; CHECK:         .functype load_zero_i64_with_unfolded_gep_negative_offset (i32) -> (v128)
164; CHECK-NEXT:  # %bb.0:
165; CHECK-NEXT:    local.get 0
166; CHECK-NEXT:    i32.const -48
167; CHECK-NEXT:    i32.add
168; CHECK-NEXT:    v128.load64_zero 0:p2align=0
169; CHECK-NEXT:    # fallthrough-return
170  %s = getelementptr inbounds i64, i64* %p, i64 -6
171  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
172  ret <2 x i64> %t
173}
174
175define <2 x i64> @load_zero_i64_with_unfolded_offset(i64* %p) {
176; CHECK-LABEL: load_zero_i64_with_unfolded_offset:
177; CHECK:         .functype load_zero_i64_with_unfolded_offset (i32) -> (v128)
178; CHECK-NEXT:  # %bb.0:
179; CHECK-NEXT:    local.get 0
180; CHECK-NEXT:    i32.const 24
181; CHECK-NEXT:    i32.add
182; CHECK-NEXT:    v128.load64_zero 0:p2align=0
183; CHECK-NEXT:    # fallthrough-return
184  %q = ptrtoint i64* %p to i32
185  %r = add nsw i32 %q, 24
186  %s = inttoptr i32 %r to i64*
187  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
188  ret <2 x i64> %t
189}
190
191define <2 x i64> @load_zero_i64_with_unfolded_gep_offset(i64* %p) {
192; CHECK-LABEL: load_zero_i64_with_unfolded_gep_offset:
193; CHECK:         .functype load_zero_i64_with_unfolded_gep_offset (i32) -> (v128)
194; CHECK-NEXT:  # %bb.0:
195; CHECK-NEXT:    local.get 0
196; CHECK-NEXT:    i32.const 48
197; CHECK-NEXT:    i32.add
198; CHECK-NEXT:    v128.load64_zero 0:p2align=0
199; CHECK-NEXT:    # fallthrough-return
200  %s = getelementptr i64, i64* %p, i64 6
201  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
202  ret <2 x i64> %t
203}
204
205define <2 x i64> @load_zero_i64_from_numeric_address() {
206; CHECK-LABEL: load_zero_i64_from_numeric_address:
207; CHECK:         .functype load_zero_i64_from_numeric_address () -> (v128)
208; CHECK-NEXT:  # %bb.0:
209; CHECK-NEXT:    i32.const 0
210; CHECK-NEXT:    v128.load64_zero 42:p2align=0
211; CHECK-NEXT:    # fallthrough-return
212  %s = inttoptr i32 42 to i64*
213  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
214  ret <2 x i64> %t
215}
216
217@gv_i64 = global i64 0
218define <2 x i64> @load_zero_i64_from_global_address() {
219; CHECK-LABEL: load_zero_i64_from_global_address:
220; CHECK:         .functype load_zero_i64_from_global_address () -> (v128)
221; CHECK-NEXT:  # %bb.0:
222; CHECK-NEXT:    i32.const 0
223; CHECK-NEXT:    v128.load64_zero gv_i64:p2align=0
224; CHECK-NEXT:    # fallthrough-return
225  %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* @gv_i64)
226  ret <2 x i64> %t
227}
228