1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM
2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM
3
4; Test that basic bulk memory codegen works correctly
5
6target triple = "wasm32-unknown-unknown"
7
8declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1)
9declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
10declare void @llvm.memcpy.p0i32.p0i32.i32(i32*, i32*, i32, i1)
11
12declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1)
13declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
14declare void @llvm.memmove.p0i32.p0i32.i32(i32*, i32*, i32, i1)
15
16declare void @llvm.memset.p0i8.i8(i8*, i8, i8, i1)
17declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)
18declare void @llvm.memset.p0i32.i32(i32*, i8, i32, i1)
19
20; CHECK-LABEL: memcpy_i8:
21; NO-BULK-MEM-NOT: memory.copy
22; BULK-MEM-NEXT: .functype memcpy_i8 (i32, i32, i32) -> ()
23; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
24; BULK-MEM-NEXT: return
25define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) {
26  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
27  ret void
28}
29
30; CHECK-LABEL: memmove_i8:
31; NO-BULK-MEM-NOT: memory.copy
32; BULK-MEM-NEXT: .functype memmove_i8 (i32, i32, i32) -> ()
33; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
34; BULK-MEM-NEXT: return
35define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) {
36  call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
37  ret void
38}
39
40; CHECK-LABEL: memset_i8:
41; NO-BULK-MEM-NOT: memory.fill
42; BULK-MEM-NEXT: .functype memset_i8 (i32, i32, i32) -> ()
43; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2
44; BULK-MEM-NEXT: return
45define void @memset_i8(i8* %dest, i8 %val, i8 zeroext %len) {
46  call void @llvm.memset.p0i8.i8(i8* %dest, i8 %val, i8 %len, i1 0)
47  ret void
48}
49
50; CHECK-LABEL: memcpy_i32:
51; NO-BULK-MEM-NOT: memory.copy
52; BULK-MEM-NEXT: .functype memcpy_i32 (i32, i32, i32) -> ()
53; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
54; BULK-MEM-NEXT: return
55define void @memcpy_i32(i32* %dest, i32* %src, i32 %len) {
56  call void @llvm.memcpy.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0)
57  ret void
58}
59
60; CHECK-LABEL: memmove_i32:
61; NO-BULK-MEM-NOT: memory.copy
62; BULK-MEM-NEXT: .functype memmove_i32 (i32, i32, i32) -> ()
63; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
64; BULK-MEM-NEXT: return
65define void @memmove_i32(i32* %dest, i32* %src, i32 %len) {
66  call void @llvm.memmove.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0)
67  ret void
68}
69
70; CHECK-LABEL: memset_i32:
71; NO-BULK-MEM-NOT: memory.fill
72; BULK-MEM-NEXT: .functype memset_i32 (i32, i32, i32) -> ()
73; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2
74; BULK-MEM-NEXT: return
75define void @memset_i32(i32* %dest, i8 %val, i32 %len) {
76  call void @llvm.memset.p0i32.i32(i32* %dest, i8 %val, i32 %len, i1 0)
77  ret void
78}
79
80; CHECK-LABEL: memcpy_1:
81; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> ()
82; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
83; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
84; CHECK-NEXT: return
85define void @memcpy_1(i8* %dest, i8* %src) {
86  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0)
87  ret void
88}
89
90; CHECK-LABEL: memmove_1:
91; CHECK-NEXT: .functype memmove_1 (i32, i32) -> ()
92; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
93; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
94; CHECK-NEXT: return
95define void @memmove_1(i8* %dest, i8* %src) {
96  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0)
97  ret void
98}
99
100; CHECK-LABEL: memset_1:
101; NO-BULK-MEM-NOT: memory.fill
102; BULK-MEM-NEXT: .functype memset_1 (i32, i32) -> ()
103; BULK-MEM-NEXT: i32.store8 0($0), $1
104; BULK-MEM-NEXT: return
105define void @memset_1(i8* %dest, i8 %val) {
106  call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1, i1 0)
107  ret void
108}
109
110; CHECK-LABEL: memcpy_1024:
111; NO-BULK-MEM-NOT: memory.copy
112; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> ()
113; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
114; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
115; BULK-MEM-NEXT: return
116define void @memcpy_1024(i8* %dest, i8* %src) {
117  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0)
118  ret void
119}
120
121; CHECK-LABEL: memmove_1024:
122; NO-BULK-MEM-NOT: memory.copy
123; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> ()
124; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
125; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
126; BULK-MEM-NEXT: return
127define void @memmove_1024(i8* %dest, i8* %src) {
128  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0)
129  ret void
130}
131
132; CHECK-LABEL: memset_1024:
133; NO-BULK-MEM-NOT: memory.fill
134; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> ()
135; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
136; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]]
137; BULK-MEM-NEXT: return
138define void @memset_1024(i8* %dest, i8 %val) {
139  call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0)
140  ret void
141}
142
143; The following tests check that frame index elimination works for
144; bulk memory instructions. The stack pointer is bumped by 112 instead
145; of 100 because the stack pointer in WebAssembly is currently always
146; 16-byte aligned, even in leaf functions, although it is not written
147; back to the global in this case.
148
149; TODO: Change TransientStackAlignment to 1 to avoid this extra
150; arithmetic. This will require forcing the use of StackAlignment in
151; PrologEpilogEmitter.cpp when
152; WebAssemblyFrameLowering::needsSPWriteback would be true.
153
154; CHECK-LABEL: memcpy_alloca_src:
155; NO-BULK-MEM-NOT: memory.copy
156; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> ()
157; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
158; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
159; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
160; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
161; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
162; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
163; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
164; BULK-MEM-NEXT: return
165define void @memcpy_alloca_src(i8* %dst) {
166  %a = alloca [100 x i8]
167  %p = bitcast [100 x i8]* %a to i8*
168  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 100, i1 false)
169  ret void
170}
171
172; CHECK-LABEL: memcpy_alloca_dst:
173; NO-BULK-MEM-NOT: memory.copy
174; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> ()
175; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
176; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
177; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
178; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
179; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
180; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
181; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
182; BULK-MEM-NEXT: return
183define void @memcpy_alloca_dst(i8* %src) {
184  %a = alloca [100 x i8]
185  %p = bitcast [100 x i8]* %a to i8*
186  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 100, i1 false)
187  ret void
188}
189
190; CHECK-LABEL: memset_alloca:
191; NO-BULK-MEM-NOT: memory.fill
192; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
193; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
194; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
195; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
196; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
197; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
198; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
199; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
200; BULK-MEM-NEXT: return
201define void @memset_alloca(i8 %val) {
202  %a = alloca [100 x i8]
203  %p = bitcast [100 x i8]* %a to i8*
204  call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 100, i1 false)
205  ret void
206}
207