1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
3
4%S = type { i32, i32, i32, i32 }
5
6declare void @foo(...)
7
8; We can split %entry and create a memcmp(16 bytes).
9define zeroext i1 @opeq1(
10; X86-LABEL: @opeq1(
11; X86-NEXT:  "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3":
12; X86-NEXT:    call void (...) @foo() #[[ATTR2:[0-9]+]]
13; X86-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
14; X86-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
15; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
16; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
17; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
18; X86-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
19; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
20; X86:       opeq1.exit:
21; X86-NEXT:    ret i1 [[TMP2]]
22;
23; Make sure this call is moved to the beginning of the entry block.
24  %S* nocapture readonly dereferenceable(16) %a,
25  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
26entry:
27  %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0
28  %0 = load i32, i32* %first.i, align 4
29  %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0
30  %1 = load i32, i32* %first1.i, align 4
31  ; Does other work.
32  call void (...) @foo() inaccessiblememonly
33  %cmp.i = icmp eq i32 %0, %1
34  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
35
36land.rhs.i:
37  %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1
38  %2 = load i32, i32* %second.i, align 4
39  %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1
40  %3 = load i32, i32* %second2.i, align 4
41  %cmp2.i = icmp eq i32 %2, %3
42  br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit
43
44land.rhs.i.2:
45  %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2
46  %4 = load i32, i32* %third.i, align 4
47  %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 2
48  %5 = load i32, i32* %third2.i, align 4
49  %cmp3.i = icmp eq i32 %4, %5
50  br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit
51
52land.rhs.i.3:
53  %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3
54  %6 = load i32, i32* %fourth.i, align 4
55  %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3
56  %7 = load i32, i32* %fourth2.i, align 4
57  %cmp4.i = icmp eq i32 %6, %7
58  br label %opeq1.exit
59
60opeq1.exit:
61  %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ]
62  ret i1 %8
63}
64
65
66; We will not be able to merge anything, make sure the call is not moved out.
67define zeroext i1 @opeq1_discontiguous(
68; X86-LABEL: @opeq1_discontiguous(
69; X86-NEXT:  entry:
70; X86-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 1
71; X86-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
72; X86-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
73; X86-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
74; X86-NEXT:    call void (...) @foo() #[[ATTR2]]
75; X86-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
76; X86-NEXT:    br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
77; X86:       land.rhs.i:
78; X86-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2
79; X86-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
80; X86-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1
81; X86-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
82; X86-NEXT:    [[CMP2_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
83; X86-NEXT:    br i1 [[CMP2_I]], label [[LAND_RHS_I_2:%.*]], label [[OPEQ1_EXIT]]
84; X86:       land.rhs.i.2:
85; X86-NEXT:    [[THIRD_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2
86; X86-NEXT:    [[TMP4:%.*]] = load i32, i32* [[THIRD_I]], align 4
87; X86-NEXT:    [[THIRD2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3
88; X86-NEXT:    [[TMP5:%.*]] = load i32, i32* [[THIRD2_I]], align 4
89; X86-NEXT:    [[CMP3_I:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
90; X86-NEXT:    br i1 [[CMP3_I]], label [[LAND_RHS_I_3:%.*]], label [[OPEQ1_EXIT]]
91; X86:       land.rhs.i.3:
92; X86-NEXT:    [[FOURTH_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1
93; X86-NEXT:    [[TMP6:%.*]] = load i32, i32* [[FOURTH_I]], align 4
94; X86-NEXT:    [[FOURTH2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3
95; X86-NEXT:    [[TMP7:%.*]] = load i32, i32* [[FOURTH2_I]], align 4
96; X86-NEXT:    [[CMP4_I:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]]
97; X86-NEXT:    br label [[OPEQ1_EXIT]]
98; X86:       opeq1.exit:
99; X86-NEXT:    [[TMP8:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ false, [[LAND_RHS_I_2]] ], [ [[CMP4_I]], [[LAND_RHS_I_3]] ]
100; X86-NEXT:    ret i1 [[TMP8]]
101;
102; Make sure this call is moved in the entry block.
103  %S* nocapture readonly dereferenceable(16) %a,
104  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
105entry:
106  %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1
107  %0 = load i32, i32* %first.i, align 4
108  %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0
109  %1 = load i32, i32* %first1.i, align 4
110  ; Does other work.
111  call void (...) @foo() inaccessiblememonly
112  %cmp.i = icmp eq i32 %0, %1
113  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
114
115land.rhs.i:
116  %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2
117  %2 = load i32, i32* %second.i, align 4
118  %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1
119  %3 = load i32, i32* %second2.i, align 4
120  %cmp2.i = icmp eq i32 %2, %3
121  br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit
122
123land.rhs.i.2:
124  %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2
125  %4 = load i32, i32* %third.i, align 4
126  %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3
127  %5 = load i32, i32* %third2.i, align 4
128  %cmp3.i = icmp eq i32 %4, %5
129  br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit
130
131land.rhs.i.3:
132  %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1
133  %6 = load i32, i32* %fourth.i, align 4
134  %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3
135  %7 = load i32, i32* %fourth2.i, align 4
136  %cmp4.i = icmp eq i32 %6, %7
137  br label %opeq1.exit
138
139opeq1.exit:
140  %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ]
141  ret i1 %8
142}
143
144; The call happens before the loads, so it cannot clobber them.
145define zeroext i1 @opeq1_call_before_loads(
146; X86-LABEL: @opeq1_call_before_loads(
147; X86-NEXT:  "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3":
148; X86-NEXT:    call void (...) @foo()
149; X86-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
150; X86-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
151; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
152; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
153; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
154; X86-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
155; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
156; X86:       opeq1.exit:
157; X86-NEXT:    ret i1 [[TMP2]]
158;
159; Make sure this call is moved to the beginning of the entry block.
160  %S* nocapture readonly dereferenceable(16) %a,
161  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
162entry:
163  call void (...) @foo()
164  %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0
165  %0 = load i32, i32* %first.i, align 4
166  %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0
167  %1 = load i32, i32* %first1.i, align 4
168  %cmp.i = icmp eq i32 %0, %1
169  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
170
171land.rhs.i:
172  %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1
173  %2 = load i32, i32* %second.i, align 4
174  %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1
175  %3 = load i32, i32* %second2.i, align 4
176  %cmp2.i = icmp eq i32 %2, %3
177  br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit
178
179land.rhs.i.2:
180  %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2
181  %4 = load i32, i32* %third.i, align 4
182  %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 2
183  %5 = load i32, i32* %third2.i, align 4
184  %cmp3.i = icmp eq i32 %4, %5
185  br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit
186
187land.rhs.i.3:
188  %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3
189  %6 = load i32, i32* %fourth.i, align 4
190  %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3
191  %7 = load i32, i32* %fourth2.i, align 4
192  %cmp4.i = icmp eq i32 %6, %7
193  br label %opeq1.exit
194
195opeq1.exit:
196  %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ]
197  ret i1 %8
198}
199
200; Call happens after the loads, and may clobber them.
201define zeroext i1 @opeq1_call_after_loads(
202; X86-LABEL: @opeq1_call_after_loads(
203; X86-NEXT:  entry:
204; X86-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
205; X86-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
206; X86-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
207; X86-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
208; X86-NEXT:    call void (...) @foo()
209; X86-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
210; X86-NEXT:    br i1 [[CMP_I]], label %"land.rhs.i+land.rhs.i.2+land.rhs.i.3", label [[OPEQ1_EXIT:%.*]]
211; X86:       "land.rhs.i+land.rhs.i.2+land.rhs.i.3":
212; X86-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1
213; X86-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1
214; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[TMP2]] to i8*
215; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[TMP3]] to i8*
216; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 12)
217; X86-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0
218; X86-NEXT:    br label [[OPEQ1_EXIT]]
219; X86:       opeq1.exit:
220; X86-NEXT:    [[TMP5:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TMP4]], %"land.rhs.i+land.rhs.i.2+land.rhs.i.3" ]
221; X86-NEXT:    ret i1 [[TMP5]]
222;
223; Make sure this call is moved to the beginning of the entry block.
224  %S* nocapture readonly dereferenceable(16) %a,
225  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
226entry:
227  %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0
228  %0 = load i32, i32* %first.i, align 4
229  %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0
230  %1 = load i32, i32* %first1.i, align 4
231  call void (...) @foo()
232  %cmp.i = icmp eq i32 %0, %1
233  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
234
235land.rhs.i:
236  %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1
237  %2 = load i32, i32* %second.i, align 4
238  %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1
239  %3 = load i32, i32* %second2.i, align 4
240  %cmp2.i = icmp eq i32 %2, %3
241  br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit
242
243land.rhs.i.2:
244  %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2
245  %4 = load i32, i32* %third.i, align 4
246  %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 2
247  %5 = load i32, i32* %third2.i, align 4
248  %cmp3.i = icmp eq i32 %4, %5
249  br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit
250
251land.rhs.i.3:
252  %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3
253  %6 = load i32, i32* %fourth.i, align 4
254  %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3
255  %7 = load i32, i32* %fourth2.i, align 4
256  %cmp4.i = icmp eq i32 %6, %7
257  br label %opeq1.exit
258
259opeq1.exit:
260  %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ]
261  ret i1 %8
262}
263