1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM
3
4; Test that basic bulk memory codegen works correctly
5
6target triple = "wasm64-unknown-unknown"
7
8declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1)
9declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
10declare void @llvm.memcpy.p0i32.p0i32.i64(i32*, i32*, i64, i1)
11
12declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1)
13declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1)
14declare void @llvm.memmove.p0i32.p0i32.i64(i32*, i32*, i64, i1)
15
16declare void @llvm.memset.p0i8.i8(i8*, i8, i8, i1)
17declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1)
18declare void @llvm.memset.p0i32.i64(i32*, i8, i64, i1)
19
20; CHECK-LABEL: memcpy_i8:
21; NO-BULK-MEM-NOT: memory.copy
22; BULK-MEM-NEXT: .functype memcpy_i8 (i64, i64, i32) -> ()
23; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2
24; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop0
25; BULK-MEM-NEXT: return
26define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) {
27  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
28  ret void
29}
30
31; CHECK-LABEL: memmove_i8:
32; NO-BULK-MEM-NOT: memory.copy
33; BULK-MEM-NEXT: .functype memmove_i8 (i64, i64, i32) -> ()
34; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2
35; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop0
36; BULK-MEM-NEXT: return
37define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) {
38  call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
39  ret void
40}
41
42; CHECK-LABEL: memset_i8:
43; NO-BULK-MEM-NOT: memory.fill
44; BULK-MEM-NEXT: .functype memset_i8 (i64, i32, i32) -> ()
45; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2
46; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop0
47; BULK-MEM-NEXT: return
48define void @memset_i8(i8* %dest, i8 %val, i8 zeroext %len) {
49  call void @llvm.memset.p0i8.i8(i8* %dest, i8 %val, i8 %len, i1 0)
50  ret void
51}
52
53; CHECK-LABEL: memcpy_i32:
54; NO-BULK-MEM-NOT: memory.copy
55; BULK-MEM-NEXT: .functype memcpy_i32 (i64, i64, i64) -> ()
56; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
57; BULK-MEM-NEXT: return
58define void @memcpy_i32(i32* %dest, i32* %src, i64 %len) {
59  call void @llvm.memcpy.p0i32.p0i32.i64(i32* %dest, i32* %src, i64 %len, i1 0)
60  ret void
61}
62
63; CHECK-LABEL: memmove_i32:
64; NO-BULK-MEM-NOT: memory.copy
65; BULK-MEM-NEXT: .functype memmove_i32 (i64, i64, i64) -> ()
66; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
67; BULK-MEM-NEXT: return
68define void @memmove_i32(i32* %dest, i32* %src, i64 %len) {
69  call void @llvm.memmove.p0i32.p0i32.i64(i32* %dest, i32* %src, i64 %len, i1 0)
70  ret void
71}
72
73; CHECK-LABEL: memset_i32:
74; NO-BULK-MEM-NOT: memory.fill
75; BULK-MEM-NEXT: .functype memset_i32 (i64, i32, i64) -> ()
76; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2
77; BULK-MEM-NEXT: return
78define void @memset_i32(i32* %dest, i8 %val, i64 %len) {
79  call void @llvm.memset.p0i32.i64(i32* %dest, i8 %val, i64 %len, i1 0)
80  ret void
81}
82
83; CHECK-LABEL: memcpy_1:
84; CHECK-NEXT: .functype memcpy_1 (i64, i64) -> ()
85; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
86; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
87; CHECK-NEXT: return
88define void @memcpy_1(i8* %dest, i8* %src) {
89  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1, i1 0)
90  ret void
91}
92
93; CHECK-LABEL: memmove_1:
94; CHECK-NEXT: .functype memmove_1 (i64, i64) -> ()
95; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
96; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
97; CHECK-NEXT: return
98define void @memmove_1(i8* %dest, i8* %src) {
99  call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1, i1 0)
100  ret void
101}
102
103; CHECK-LABEL: memset_1:
104; NO-BULK-MEM-NOT: memory.fill
105; BULK-MEM-NEXT: .functype memset_1 (i64, i32) -> ()
106; BULK-MEM-NEXT: i32.store8 0($0), $1
107; BULK-MEM-NEXT: return
108define void @memset_1(i8* %dest, i8 %val) {
109  call void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 1, i1 0)
110  ret void
111}
112
113; CHECK-LABEL: memcpy_1024:
114; NO-BULK-MEM-NOT: memory.copy
115; BULK-MEM-NEXT: .functype memcpy_1024 (i64, i64) -> ()
116; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024
117; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
118; BULK-MEM-NEXT: return
119define void @memcpy_1024(i8* %dest, i8* %src) {
120  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1024, i1 0)
121  ret void
122}
123
124; CHECK-LABEL: memmove_1024:
125; NO-BULK-MEM-NOT: memory.copy
126; BULK-MEM-NEXT: .functype memmove_1024 (i64, i64) -> ()
127; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024
128; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
129; BULK-MEM-NEXT: return
130define void @memmove_1024(i8* %dest, i8* %src) {
131  call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1024, i1 0)
132  ret void
133}
134
135; CHECK-LABEL: memset_1024:
136; NO-BULK-MEM-NOT: memory.fill
137; BULK-MEM-NEXT: .functype memset_1024 (i64, i32) -> ()
138; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024
139; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]]
140; BULK-MEM-NEXT: return
141define void @memset_1024(i8* %dest, i8 %val) {
142  call void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 1024, i1 0)
143  ret void
144}
145
146; The following tests check that frame index elimination works for
147; bulk memory instructions. The stack pointer is bumped by 112 instead
148; of 100 because the stack pointer in WebAssembly is currently always
149; 16-byte aligned, even in leaf functions, although it is not written
150; back to the global in this case.
151
152; TODO: Change TransientStackAlignment to 1 to avoid this extra
153; arithmetic. This will require forcing the use of StackAlignment in
154; PrologEpilogEmitter.cpp when
155; WebAssemblyFrameLowering::needsSPWriteback would be true.
156
157; CHECK-LABEL: memcpy_alloca_src:
158; NO-BULK-MEM-NOT: memory.copy
159; BULK-MEM-NEXT: .functype memcpy_alloca_src (i64) -> ()
160; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
161; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
162; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
163; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
164; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
165; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
166; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
167; BULK-MEM-NEXT: return
168define void @memcpy_alloca_src(i8* %dst) {
169  %a = alloca [100 x i8]
170  %p = bitcast [100 x i8]* %a to i8*
171  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 100, i1 false)
172  ret void
173}
174
175; CHECK-LABEL: memcpy_alloca_dst:
176; NO-BULK-MEM-NOT: memory.copy
177; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i64) -> ()
178; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
179; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
180; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
181; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
182; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
183; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
184; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
185; BULK-MEM-NEXT: return
186define void @memcpy_alloca_dst(i8* %src) {
187  %a = alloca [100 x i8]
188  %p = bitcast [100 x i8]* %a to i8*
189  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %src, i64 100, i1 false)
190  ret void
191}
192
193; CHECK-LABEL: memset_alloca:
194; NO-BULK-MEM-NOT: memory.fill
195; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
196; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
197; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
198; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
199; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
200; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
201; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
202; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
203; BULK-MEM-NEXT: return
204define void @memset_alloca(i8 %val) {
205  %a = alloca [100 x i8]
206  %p = bitcast [100 x i8]* %a to i8*
207  call void @llvm.memset.p0i8.i64(i8* %p, i8 %val, i64 100, i1 false)
208  ret void
209}
210