1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3; RUN:   | FileCheck %s -check-prefix=RV32I
4; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
5; RUN:   | FileCheck %s -check-prefix=RV64I
6
7; Check that memory accesses to array elements with large offsets have those
8; offsets split into a base offset, plus a smaller offset that is folded into
9; the memory operation. We should also only compute that base offset once,
10; since it can be shared for all memory operations in this test.
11define void @test1([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) {
12; RV32I-LABEL: test1:
13; RV32I:       # %bb.0: # %entry
14; RV32I-NEXT:    lw a0, 0(a0)
15; RV32I-NEXT:    lui a2, 20
16; RV32I-NEXT:    addi a2, a2, -1920
17; RV32I-NEXT:    add a1, a1, a2
18; RV32I-NEXT:    add a0, a0, a2
19; RV32I-NEXT:    li a2, 2
20; RV32I-NEXT:    sw a2, 0(a0)
21; RV32I-NEXT:    li a3, 1
22; RV32I-NEXT:    sw a3, 4(a0)
23; RV32I-NEXT:    sw a3, 0(a1)
24; RV32I-NEXT:    sw a2, 4(a1)
25; RV32I-NEXT:    ret
26;
27; RV64I-LABEL: test1:
28; RV64I:       # %bb.0: # %entry
29; RV64I-NEXT:    ld a0, 0(a0)
30; RV64I-NEXT:    lui a2, 20
31; RV64I-NEXT:    addiw a2, a2, -1920
32; RV64I-NEXT:    add a1, a1, a2
33; RV64I-NEXT:    add a0, a0, a2
34; RV64I-NEXT:    li a2, 2
35; RV64I-NEXT:    sw a2, 0(a0)
36; RV64I-NEXT:    li a3, 1
37; RV64I-NEXT:    sw a3, 4(a0)
38; RV64I-NEXT:    sw a3, 0(a1)
39; RV64I-NEXT:    sw a2, 4(a1)
40; RV64I-NEXT:    ret
41entry:
42  %s = load [65536 x i32]*, [65536 x i32]** %sp
43  %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000
44  %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001
45  %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000
46  %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001
47  store i32 2, i32* %gep0
48  store i32 1, i32* %gep1
49  store i32 1, i32* %gep2
50  store i32 2, i32* %gep3
51  ret void
52}
53
54; Ditto. Check it when the GEPs are not in the entry block.
55define void @test2([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) {
56; RV32I-LABEL: test2:
57; RV32I:       # %bb.0: # %entry
58; RV32I-NEXT:    li a3, 0
59; RV32I-NEXT:    lw a4, 0(a0)
60; RV32I-NEXT:    lui a0, 20
61; RV32I-NEXT:    addi a5, a0, -1920
62; RV32I-NEXT:    add a0, a1, a5
63; RV32I-NEXT:    add a1, a4, a5
64; RV32I-NEXT:    bge a3, a2, .LBB1_2
65; RV32I-NEXT:  .LBB1_1: # %while_body
66; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
67; RV32I-NEXT:    addi a4, a3, 1
68; RV32I-NEXT:    sw a4, 0(a1)
69; RV32I-NEXT:    sw a3, 4(a1)
70; RV32I-NEXT:    sw a4, 0(a0)
71; RV32I-NEXT:    sw a3, 4(a0)
72; RV32I-NEXT:    mv a3, a4
73; RV32I-NEXT:    blt a3, a2, .LBB1_1
74; RV32I-NEXT:  .LBB1_2: # %while_end
75; RV32I-NEXT:    ret
76;
77; RV64I-LABEL: test2:
78; RV64I:       # %bb.0: # %entry
79; RV64I-NEXT:    li a3, 0
80; RV64I-NEXT:    ld a4, 0(a0)
81; RV64I-NEXT:    lui a0, 20
82; RV64I-NEXT:    addiw a5, a0, -1920
83; RV64I-NEXT:    add a0, a1, a5
84; RV64I-NEXT:    add a1, a4, a5
85; RV64I-NEXT:    sext.w a2, a2
86; RV64I-NEXT:    bge a3, a2, .LBB1_2
87; RV64I-NEXT:  .LBB1_1: # %while_body
88; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
89; RV64I-NEXT:    addiw a4, a3, 1
90; RV64I-NEXT:    sw a4, 0(a1)
91; RV64I-NEXT:    sw a3, 4(a1)
92; RV64I-NEXT:    sw a4, 0(a0)
93; RV64I-NEXT:    sw a3, 4(a0)
94; RV64I-NEXT:    mv a3, a4
95; RV64I-NEXT:    blt a3, a2, .LBB1_1
96; RV64I-NEXT:  .LBB1_2: # %while_end
97; RV64I-NEXT:    ret
98entry:
99  %s = load [65536 x i32]*, [65536 x i32]** %sp
100  br label %while_cond
101while_cond:
102  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
103  %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000
104  %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001
105  %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000
106  %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001
107  %cmp = icmp slt i32 %phi, %n
108  br i1 %cmp, label %while_body, label %while_end
109while_body:
110  %i = add i32 %phi, 1
111  %j = add i32 %phi, 2
112  store i32 %i, i32* %gep0
113  store i32 %phi, i32* %gep1
114  store i32 %i, i32* %gep2
115  store i32 %phi, i32* %gep3
116  br label %while_cond
117while_end:
118  ret void
119}
120
121; GEPs have been manually split so the base GEP does not get used by any memory
122; instructions. Make sure we use an offset and common base for each of the
123; stores.
124define void @test3([65536 x i32]* %t) {
125; RV32I-LABEL: test3:
126; RV32I:       # %bb.0: # %entry
127; RV32I-NEXT:    lui a1, 20
128; RV32I-NEXT:    addi a1, a1, -1920
129; RV32I-NEXT:    add a0, a0, a1
130; RV32I-NEXT:    li a1, 2
131; RV32I-NEXT:    sw a1, 4(a0)
132; RV32I-NEXT:    li a1, 3
133; RV32I-NEXT:    sw a1, 8(a0)
134; RV32I-NEXT:    ret
135;
136; RV64I-LABEL: test3:
137; RV64I:       # %bb.0: # %entry
138; RV64I-NEXT:    lui a1, 20
139; RV64I-NEXT:    addiw a1, a1, -1920
140; RV64I-NEXT:    add a0, a0, a1
141; RV64I-NEXT:    li a1, 2
142; RV64I-NEXT:    sw a1, 4(a0)
143; RV64I-NEXT:    li a1, 3
144; RV64I-NEXT:    sw a1, 8(a0)
145; RV64I-NEXT:    ret
146entry:
147  %0 = bitcast [65536 x i32]* %t to i8*
148  %splitgep = getelementptr i8, i8* %0, i64 80000
149  %1 = getelementptr i8, i8* %splitgep, i64 4
150  %2 = bitcast i8* %1 to i32*
151  %3 = getelementptr i8, i8* %splitgep, i64 8
152  %4 = bitcast i8* %3 to i32*
153  store i32 2, i32* %2, align 4
154  store i32 3, i32* %4, align 4
155  ret void
156}
157