1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM 2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM 3 4; Test that basic bulk memory codegen works correctly 5 6target triple = "wasm32-unknown-unknown" 7 8declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1) 9declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) 10declare void @llvm.memcpy.p0i32.p0i32.i32(i32*, i32*, i32, i1) 11 12declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1) 13declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1) 14declare void @llvm.memmove.p0i32.p0i32.i32(i32*, i32*, i32, i1) 15 16declare void @llvm.memset.p0i8.i8(i8*, i8, i8, i1) 17declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) 18declare void @llvm.memset.p0i32.i32(i32*, i8, i32, i1) 19 20; CHECK-LABEL: memcpy_i8: 21; NO-BULK-MEM-NOT: memory.copy 22; BULK-MEM-NEXT: .functype memcpy_i8 (i32, i32, i32) -> () 23; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 24; BULK-MEM-NEXT: return 25define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) { 26 call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0) 27 ret void 28} 29 30; CHECK-LABEL: memmove_i8: 31; NO-BULK-MEM-NOT: memory.copy 32; BULK-MEM-NEXT: .functype memmove_i8 (i32, i32, i32) -> () 33; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 34; BULK-MEM-NEXT: return 35define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) { 36 call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0) 37 ret void 38} 39 40; CHECK-LABEL: memset_i8: 41; NO-BULK-MEM-NOT: memory.fill 42; BULK-MEM-NEXT: .functype memset_i8 (i32, i32, i32) -> () 43; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 44; BULK-MEM-NEXT: return 45define void @memset_i8(i8* %dest, i8 %val, i8 zeroext %len) { 46 call void @llvm.memset.p0i8.i8(i8* %dest, i8 %val, i8 %len, i1 0) 47 ret void 48} 49 50; CHECK-LABEL: memcpy_i32: 51; NO-BULK-MEM-NOT: memory.copy 52; BULK-MEM-NEXT: .functype memcpy_i32 (i32, i32, i32) -> () 53; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 54; BULK-MEM-NEXT: return 55define void @memcpy_i32(i32* %dest, i32* %src, i32 %len) { 56 call void @llvm.memcpy.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0) 57 ret void 58} 59 60; CHECK-LABEL: memmove_i32: 61; NO-BULK-MEM-NOT: memory.copy 62; BULK-MEM-NEXT: .functype memmove_i32 (i32, i32, i32) -> () 63; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 64; BULK-MEM-NEXT: return 65define void @memmove_i32(i32* %dest, i32* %src, i32 %len) { 66 call void @llvm.memmove.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0) 67 ret void 68} 69 70; CHECK-LABEL: memset_i32: 71; NO-BULK-MEM-NOT: memory.fill 72; BULK-MEM-NEXT: .functype memset_i32 (i32, i32, i32) -> () 73; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 74; BULK-MEM-NEXT: return 75define void @memset_i32(i32* %dest, i8 %val, i32 %len) { 76 call void @llvm.memset.p0i32.i32(i32* %dest, i8 %val, i32 %len, i1 0) 77 ret void 78} 79 80; CHECK-LABEL: memcpy_1: 81; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> () 82; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 83; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 84; CHECK-NEXT: return 85define void @memcpy_1(i8* %dest, i8* %src) { 86 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0) 87 ret void 88} 89 90; CHECK-LABEL: memmove_1: 91; CHECK-NEXT: .functype memmove_1 (i32, i32) -> () 92; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 93; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 94; CHECK-NEXT: return 95define void @memmove_1(i8* %dest, i8* %src) { 96 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0) 97 ret void 98} 99 100; CHECK-LABEL: memset_1: 101; NO-BULK-MEM-NOT: memory.fill 102; BULK-MEM-NEXT: .functype memset_1 (i32, i32) -> () 103; BULK-MEM-NEXT: i32.store8 0($0), $1 104; BULK-MEM-NEXT: return 105define void @memset_1(i8* %dest, i8 %val) { 106 call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1, i1 0) 107 ret void 108} 109 110; CHECK-LABEL: memcpy_1024: 111; NO-BULK-MEM-NOT: memory.copy 112; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> () 113; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 114; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] 115; BULK-MEM-NEXT: return 116define void @memcpy_1024(i8* %dest, i8* %src) { 117 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0) 118 ret void 119} 120 121; CHECK-LABEL: memmove_1024: 122; NO-BULK-MEM-NOT: memory.copy 123; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> () 124; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 125; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] 126; BULK-MEM-NEXT: return 127define void @memmove_1024(i8* %dest, i8* %src) { 128 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0) 129 ret void 130} 131 132; CHECK-LABEL: memset_1024: 133; NO-BULK-MEM-NOT: memory.fill 134; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> () 135; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 136; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] 137; BULK-MEM-NEXT: return 138define void @memset_1024(i8* %dest, i8 %val) { 139 call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0) 140 ret void 141} 142 143; The following tests check that frame index elimination works for 144; bulk memory instructions. The stack pointer is bumped by 112 instead 145; of 100 because the stack pointer in WebAssembly is currently always 146; 16-byte aligned, even in leaf functions, although it is not written 147; back to the global in this case. 148 149; TODO: Change TransientStackAlignment to 1 to avoid this extra 150; arithmetic. This will require forcing the use of StackAlignment in 151; PrologEpilogEmitter.cpp when 152; WebAssemblyFrameLowering::needsSPWriteback would be true. 153 154; CHECK-LABEL: memcpy_alloca_src: 155; NO-BULK-MEM-NOT: memory.copy 156; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> () 157; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 158; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 159; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 160; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 161; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 162; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 163; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] 164; BULK-MEM-NEXT: return 165define void @memcpy_alloca_src(i8* %dst) { 166 %a = alloca [100 x i8] 167 %p = bitcast [100 x i8]* %a to i8* 168 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 100, i1 false) 169 ret void 170} 171 172; CHECK-LABEL: memcpy_alloca_dst: 173; NO-BULK-MEM-NOT: memory.copy 174; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> () 175; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 176; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 177; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 178; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 179; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 180; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 181; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] 182; BULK-MEM-NEXT: return 183define void @memcpy_alloca_dst(i8* %src) { 184 %a = alloca [100 x i8] 185 %p = bitcast [100 x i8]* %a to i8* 186 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 100, i1 false) 187 ret void 188} 189 190; CHECK-LABEL: memset_alloca: 191; NO-BULK-MEM-NOT: memory.fill 192; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () 193; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 194; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 195; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 196; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 197; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] 198; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 199; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] 200; BULK-MEM-NEXT: return 201define void @memset_alloca(i8 %val) { 202 %a = alloca [100 x i8] 203 %p = bitcast [100 x i8]* %a to i8* 204 call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 100, i1 false) 205 ret void 206} 207