1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM 2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM 3 4; Test that basic bulk memory codegen works correctly 5 6target triple = "wasm64-unknown-unknown" 7 8declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1) 9declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) 10declare void @llvm.memcpy.p0i32.p0i32.i64(i32*, i32*, i64, i1) 11 12declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1) 13declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1) 14declare void @llvm.memmove.p0i32.p0i32.i64(i32*, i32*, i64, i1) 15 16declare void @llvm.memset.p0i8.i8(i8*, i8, i8, i1) 17declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) 18declare void @llvm.memset.p0i32.i64(i32*, i8, i64, i1) 19 20; CHECK-LABEL: memcpy_i8: 21; NO-BULK-MEM-NOT: memory.copy 22; BULK-MEM-NEXT: .functype memcpy_i8 (i64, i64, i32) -> () 23; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2 24; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop0 25; BULK-MEM-NEXT: return 26define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) { 27 call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0) 28 ret void 29} 30 31; CHECK-LABEL: memmove_i8: 32; NO-BULK-MEM-NOT: memory.copy 33; BULK-MEM-NEXT: .functype memmove_i8 (i64, i64, i32) -> () 34; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2 35; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop0 36; BULK-MEM-NEXT: return 37define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) { 38 call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0) 39 ret void 40} 41 42; CHECK-LABEL: memset_i8: 43; NO-BULK-MEM-NOT: memory.fill 44; BULK-MEM-NEXT: .functype memset_i8 (i64, i32, i32) -> () 45; BULK-MEM-NEXT: i64.extend_i32_u $push0=, $2 46; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop0 47; BULK-MEM-NEXT: return 48define void @memset_i8(i8* %dest, i8 %val, i8 zeroext %len) { 49 call void @llvm.memset.p0i8.i8(i8* %dest, i8 %val, i8 %len, i1 0) 50 ret void 51} 52 53; CHECK-LABEL: memcpy_i32: 54; NO-BULK-MEM-NOT: memory.copy 55; BULK-MEM-NEXT: .functype memcpy_i32 (i64, i64, i64) -> () 56; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 57; BULK-MEM-NEXT: return 58define void @memcpy_i32(i32* %dest, i32* %src, i64 %len) { 59 call void @llvm.memcpy.p0i32.p0i32.i64(i32* %dest, i32* %src, i64 %len, i1 0) 60 ret void 61} 62 63; CHECK-LABEL: memmove_i32: 64; NO-BULK-MEM-NOT: memory.copy 65; BULK-MEM-NEXT: .functype memmove_i32 (i64, i64, i64) -> () 66; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 67; BULK-MEM-NEXT: return 68define void @memmove_i32(i32* %dest, i32* %src, i64 %len) { 69 call void @llvm.memmove.p0i32.p0i32.i64(i32* %dest, i32* %src, i64 %len, i1 0) 70 ret void 71} 72 73; CHECK-LABEL: memset_i32: 74; NO-BULK-MEM-NOT: memory.fill 75; BULK-MEM-NEXT: .functype memset_i32 (i64, i32, i64) -> () 76; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 77; BULK-MEM-NEXT: return 78define void @memset_i32(i32* %dest, i8 %val, i64 %len) { 79 call void @llvm.memset.p0i32.i64(i32* %dest, i8 %val, i64 %len, i1 0) 80 ret void 81} 82 83; CHECK-LABEL: memcpy_1: 84; CHECK-NEXT: .functype memcpy_1 (i64, i64) -> () 85; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 86; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 87; CHECK-NEXT: return 88define void @memcpy_1(i8* %dest, i8* %src) { 89 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1, i1 0) 90 ret void 91} 92 93; CHECK-LABEL: memmove_1: 94; CHECK-NEXT: .functype memmove_1 (i64, i64) -> () 95; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 96; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 97; CHECK-NEXT: return 98define void @memmove_1(i8* %dest, i8* %src) { 99 call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1, i1 0) 100 ret void 101} 102 103; CHECK-LABEL: memset_1: 104; NO-BULK-MEM-NOT: memory.fill 105; BULK-MEM-NEXT: .functype memset_1 (i64, i32) -> () 106; BULK-MEM-NEXT: i32.store8 0($0), $1 107; BULK-MEM-NEXT: return 108define void @memset_1(i8* %dest, i8 %val) { 109 call void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 1, i1 0) 110 ret void 111} 112 113; CHECK-LABEL: memcpy_1024: 114; NO-BULK-MEM-NOT: memory.copy 115; BULK-MEM-NEXT: .functype memcpy_1024 (i64, i64) -> () 116; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 117; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] 118; BULK-MEM-NEXT: return 119define void @memcpy_1024(i8* %dest, i8* %src) { 120 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1024, i1 0) 121 ret void 122} 123 124; CHECK-LABEL: memmove_1024: 125; NO-BULK-MEM-NOT: memory.copy 126; BULK-MEM-NEXT: .functype memmove_1024 (i64, i64) -> () 127; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 128; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] 129; BULK-MEM-NEXT: return 130define void @memmove_1024(i8* %dest, i8* %src) { 131 call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1024, i1 0) 132 ret void 133} 134 135; CHECK-LABEL: memset_1024: 136; NO-BULK-MEM-NOT: memory.fill 137; BULK-MEM-NEXT: .functype memset_1024 (i64, i32) -> () 138; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 139; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] 140; BULK-MEM-NEXT: return 141define void @memset_1024(i8* %dest, i8 %val) { 142 call void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 1024, i1 0) 143 ret void 144} 145 146; The following tests check that frame index elimination works for 147; bulk memory instructions. The stack pointer is bumped by 112 instead 148; of 100 because the stack pointer in WebAssembly is currently always 149; 16-byte aligned, even in leaf functions, although it is not written 150; back to the global in this case. 151 152; TODO: Change TransientStackAlignment to 1 to avoid this extra 153; arithmetic. This will require forcing the use of StackAlignment in 154; PrologEpilogEmitter.cpp when 155; WebAssemblyFrameLowering::needsSPWriteback would be true. 156 157; CHECK-LABEL: memcpy_alloca_src: 158; NO-BULK-MEM-NOT: memory.copy 159; BULK-MEM-NEXT: .functype memcpy_alloca_src (i64) -> () 160; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 161; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112 162; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 163; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 164; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 165; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 166; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] 167; BULK-MEM-NEXT: return 168define void @memcpy_alloca_src(i8* %dst) { 169 %a = alloca [100 x i8] 170 %p = bitcast [100 x i8]* %a to i8* 171 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 100, i1 false) 172 ret void 173} 174 175; CHECK-LABEL: memcpy_alloca_dst: 176; NO-BULK-MEM-NOT: memory.copy 177; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i64) -> () 178; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 179; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112 180; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 181; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 182; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 183; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 184; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] 185; BULK-MEM-NEXT: return 186define void @memcpy_alloca_dst(i8* %src) { 187 %a = alloca [100 x i8] 188 %p = bitcast [100 x i8]* %a to i8* 189 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %src, i64 100, i1 false) 190 ret void 191} 192 193; CHECK-LABEL: memset_alloca: 194; NO-BULK-MEM-NOT: memory.fill 195; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () 196; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 197; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112 198; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 199; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 200; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 201; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 202; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] 203; BULK-MEM-NEXT: return 204define void @memset_alloca(i8 %val) { 205 %a = alloca [100 x i8] 206 %p = bitcast [100 x i8]* %a to i8* 207 call void @llvm.memset.p0i8.i64(i8* %p, i8 %val, i64 100, i1 false) 208 ret void 209} 210