1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s -check-prefix=RV32I 4; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s -check-prefix=RV64I 6 7; Check that memory accesses to array elements with large offsets have those 8; offsets split into a base offset, plus a smaller offset that is folded into 9; the memory operation. We should also only compute that base offset once, 10; since it can be shared for all memory operations in this test. 11define void @test1([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { 12; RV32I-LABEL: test1: 13; RV32I: # %bb.0: # %entry 14; RV32I-NEXT: lw a0, 0(a0) 15; RV32I-NEXT: lui a2, 20 16; RV32I-NEXT: add a1, a1, a2 17; RV32I-NEXT: add a0, a0, a2 18; RV32I-NEXT: li a2, 2 19; RV32I-NEXT: sw a2, -1920(a0) 20; RV32I-NEXT: li a3, 1 21; RV32I-NEXT: sw a3, -1916(a0) 22; RV32I-NEXT: sw a3, -1920(a1) 23; RV32I-NEXT: sw a2, -1916(a1) 24; RV32I-NEXT: ret 25; 26; RV64I-LABEL: test1: 27; RV64I: # %bb.0: # %entry 28; RV64I-NEXT: ld a0, 0(a0) 29; RV64I-NEXT: lui a2, 20 30; RV64I-NEXT: add a1, a1, a2 31; RV64I-NEXT: add a0, a0, a2 32; RV64I-NEXT: li a2, 2 33; RV64I-NEXT: sw a2, -1920(a0) 34; RV64I-NEXT: li a3, 1 35; RV64I-NEXT: sw a3, -1916(a0) 36; RV64I-NEXT: sw a3, -1920(a1) 37; RV64I-NEXT: sw a2, -1916(a1) 38; RV64I-NEXT: ret 39entry: 40 %s = load [65536 x i32]*, [65536 x i32]** %sp 41 %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000 42 %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001 43 %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000 44 %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001 45 store i32 2, i32* %gep0 46 store i32 1, i32* %gep1 47 store i32 1, i32* %gep2 48 store i32 2, i32* %gep3 49 ret void 50} 51 52; Ditto. Check it when the GEPs are not in the entry block. 53define void @test2([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { 54; RV32I-LABEL: test2: 55; RV32I: # %bb.0: # %entry 56; RV32I-NEXT: li a3, 0 57; RV32I-NEXT: lw a4, 0(a0) 58; RV32I-NEXT: lui a0, 20 59; RV32I-NEXT: addi a5, a0, -1920 60; RV32I-NEXT: add a0, a1, a5 61; RV32I-NEXT: add a1, a4, a5 62; RV32I-NEXT: bge a3, a2, .LBB1_2 63; RV32I-NEXT: .LBB1_1: # %while_body 64; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 65; RV32I-NEXT: addi a4, a3, 1 66; RV32I-NEXT: sw a4, 0(a1) 67; RV32I-NEXT: sw a3, 4(a1) 68; RV32I-NEXT: sw a4, 0(a0) 69; RV32I-NEXT: sw a3, 4(a0) 70; RV32I-NEXT: mv a3, a4 71; RV32I-NEXT: blt a3, a2, .LBB1_1 72; RV32I-NEXT: .LBB1_2: # %while_end 73; RV32I-NEXT: ret 74; 75; RV64I-LABEL: test2: 76; RV64I: # %bb.0: # %entry 77; RV64I-NEXT: li a3, 0 78; RV64I-NEXT: ld a4, 0(a0) 79; RV64I-NEXT: lui a0, 20 80; RV64I-NEXT: addiw a5, a0, -1920 81; RV64I-NEXT: add a0, a1, a5 82; RV64I-NEXT: add a1, a4, a5 83; RV64I-NEXT: sext.w a2, a2 84; RV64I-NEXT: bge a3, a2, .LBB1_2 85; RV64I-NEXT: .LBB1_1: # %while_body 86; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 87; RV64I-NEXT: addiw a4, a3, 1 88; RV64I-NEXT: sw a4, 0(a1) 89; RV64I-NEXT: sw a3, 4(a1) 90; RV64I-NEXT: sw a4, 0(a0) 91; RV64I-NEXT: sw a3, 4(a0) 92; RV64I-NEXT: mv a3, a4 93; RV64I-NEXT: blt a3, a2, .LBB1_1 94; RV64I-NEXT: .LBB1_2: # %while_end 95; RV64I-NEXT: ret 96entry: 97 %s = load [65536 x i32]*, [65536 x i32]** %sp 98 br label %while_cond 99while_cond: 100 %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] 101 %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000 102 %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001 103 %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000 104 %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001 105 %cmp = icmp slt i32 %phi, %n 106 br i1 %cmp, label %while_body, label %while_end 107while_body: 108 %i = add i32 %phi, 1 109 %j = add i32 %phi, 2 110 store i32 %i, i32* %gep0 111 store i32 %phi, i32* %gep1 112 store i32 %i, i32* %gep2 113 store i32 %phi, i32* %gep3 114 br label %while_cond 115while_end: 116 ret void 117} 118 119; GEPs have been manually split so the base GEP does not get used by any memory 120; instructions. Make sure we use an offset and common base for each of the 121; stores. 122define void @test3([65536 x i32]* %t) { 123; RV32I-LABEL: test3: 124; RV32I: # %bb.0: # %entry 125; RV32I-NEXT: lui a1, 20 126; RV32I-NEXT: add a0, a0, a1 127; RV32I-NEXT: li a1, 2 128; RV32I-NEXT: sw a1, -1916(a0) 129; RV32I-NEXT: li a1, 3 130; RV32I-NEXT: sw a1, -1912(a0) 131; RV32I-NEXT: ret 132; 133; RV64I-LABEL: test3: 134; RV64I: # %bb.0: # %entry 135; RV64I-NEXT: lui a1, 20 136; RV64I-NEXT: add a0, a0, a1 137; RV64I-NEXT: li a1, 2 138; RV64I-NEXT: sw a1, -1916(a0) 139; RV64I-NEXT: li a1, 3 140; RV64I-NEXT: sw a1, -1912(a0) 141; RV64I-NEXT: ret 142entry: 143 %0 = bitcast [65536 x i32]* %t to i8* 144 %splitgep = getelementptr i8, i8* %0, i64 80000 145 %1 = getelementptr i8, i8* %splitgep, i64 4 146 %2 = bitcast i8* %1 to i32* 147 %3 = getelementptr i8, i8* %splitgep, i64 8 148 %4 = bitcast i8* %3 to i32* 149 store i32 2, i32* %2, align 4 150 store i32 3, i32* %4, align 4 151 ret void 152} 153