1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 3 4%S = type { i32, i32, i32, i32 } 5 6declare void @foo(...) 7 8; We can split %entry and create a memcmp(16 bytes). 9define zeroext i1 @opeq1( 10; X86-LABEL: @opeq1( 11; X86-NEXT: "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3": 12; X86-NEXT: call void (...) @foo() #[[ATTR2:[0-9]+]] 13; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 14; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 15; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8* 16; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8* 17; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16) 18; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 19; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] 20; X86: opeq1.exit: 21; X86-NEXT: ret i1 [[TMP2]] 22; 23; Make sure this call is moved to the beginning of the entry block. 24 %S* nocapture readonly dereferenceable(16) %a, 25 %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync { 26entry: 27 %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 28 %0 = load i32, i32* %first.i, align 4 29 %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 30 %1 = load i32, i32* %first1.i, align 4 31 ; Does other work. 32 call void (...) @foo() inaccessiblememonly 33 %cmp.i = icmp eq i32 %0, %1 34 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit 35 36land.rhs.i: 37 %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 38 %2 = load i32, i32* %second.i, align 4 39 %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 40 %3 = load i32, i32* %second2.i, align 4 41 %cmp2.i = icmp eq i32 %2, %3 42 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit 43 44land.rhs.i.2: 45 %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2 46 %4 = load i32, i32* %third.i, align 4 47 %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 2 48 %5 = load i32, i32* %third2.i, align 4 49 %cmp3.i = icmp eq i32 %4, %5 50 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit 51 52land.rhs.i.3: 53 %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3 54 %6 = load i32, i32* %fourth.i, align 4 55 %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 56 %7 = load i32, i32* %fourth2.i, align 4 57 %cmp4.i = icmp eq i32 %6, %7 58 br label %opeq1.exit 59 60opeq1.exit: 61 %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ] 62 ret i1 %8 63} 64 65 66; We will not be able to merge anything, make sure the call is not moved out. 67define zeroext i1 @opeq1_discontiguous( 68; X86-LABEL: @opeq1_discontiguous( 69; X86-NEXT: entry: 70; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 1 71; X86-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 72; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 73; X86-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 74; X86-NEXT: call void (...) @foo() #[[ATTR2]] 75; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] 76; X86-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] 77; X86: land.rhs.i: 78; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2 79; X86-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4 80; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1 81; X86-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 82; X86-NEXT: [[CMP2_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] 83; X86-NEXT: br i1 [[CMP2_I]], label [[LAND_RHS_I_2:%.*]], label [[OPEQ1_EXIT]] 84; X86: land.rhs.i.2: 85; X86-NEXT: [[THIRD_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2 86; X86-NEXT: [[TMP4:%.*]] = load i32, i32* [[THIRD_I]], align 4 87; X86-NEXT: [[THIRD2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3 88; X86-NEXT: [[TMP5:%.*]] = load i32, i32* [[THIRD2_I]], align 4 89; X86-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] 90; X86-NEXT: br i1 [[CMP3_I]], label [[LAND_RHS_I_3:%.*]], label [[OPEQ1_EXIT]] 91; X86: land.rhs.i.3: 92; X86-NEXT: [[FOURTH_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1 93; X86-NEXT: [[TMP6:%.*]] = load i32, i32* [[FOURTH_I]], align 4 94; X86-NEXT: [[FOURTH2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3 95; X86-NEXT: [[TMP7:%.*]] = load i32, i32* [[FOURTH2_I]], align 4 96; X86-NEXT: [[CMP4_I:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]] 97; X86-NEXT: br label [[OPEQ1_EXIT]] 98; X86: opeq1.exit: 99; X86-NEXT: [[TMP8:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ false, [[LAND_RHS_I_2]] ], [ [[CMP4_I]], [[LAND_RHS_I_3]] ] 100; X86-NEXT: ret i1 [[TMP8]] 101; 102; Make sure this call is moved in the entry block. 103 %S* nocapture readonly dereferenceable(16) %a, 104 %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { 105entry: 106 %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 107 %0 = load i32, i32* %first.i, align 4 108 %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 109 %1 = load i32, i32* %first1.i, align 4 110 ; Does other work. 111 call void (...) @foo() inaccessiblememonly 112 %cmp.i = icmp eq i32 %0, %1 113 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit 114 115land.rhs.i: 116 %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2 117 %2 = load i32, i32* %second.i, align 4 118 %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 119 %3 = load i32, i32* %second2.i, align 4 120 %cmp2.i = icmp eq i32 %2, %3 121 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit 122 123land.rhs.i.2: 124 %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2 125 %4 = load i32, i32* %third.i, align 4 126 %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 127 %5 = load i32, i32* %third2.i, align 4 128 %cmp3.i = icmp eq i32 %4, %5 129 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit 130 131land.rhs.i.3: 132 %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 133 %6 = load i32, i32* %fourth.i, align 4 134 %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 135 %7 = load i32, i32* %fourth2.i, align 4 136 %cmp4.i = icmp eq i32 %6, %7 137 br label %opeq1.exit 138 139opeq1.exit: 140 %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ] 141 ret i1 %8 142} 143 144; The call happens before the loads, so it cannot clobber them. 145define zeroext i1 @opeq1_call_before_loads( 146; X86-LABEL: @opeq1_call_before_loads( 147; X86-NEXT: "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3": 148; X86-NEXT: call void (...) @foo() 149; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 150; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 151; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8* 152; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8* 153; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16) 154; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 155; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] 156; X86: opeq1.exit: 157; X86-NEXT: ret i1 [[TMP2]] 158; 159; Make sure this call is moved to the beginning of the entry block. 160 %S* nocapture readonly dereferenceable(16) %a, 161 %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync { 162entry: 163 call void (...) @foo() 164 %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 165 %0 = load i32, i32* %first.i, align 4 166 %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 167 %1 = load i32, i32* %first1.i, align 4 168 %cmp.i = icmp eq i32 %0, %1 169 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit 170 171land.rhs.i: 172 %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 173 %2 = load i32, i32* %second.i, align 4 174 %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 175 %3 = load i32, i32* %second2.i, align 4 176 %cmp2.i = icmp eq i32 %2, %3 177 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit 178 179land.rhs.i.2: 180 %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2 181 %4 = load i32, i32* %third.i, align 4 182 %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 2 183 %5 = load i32, i32* %third2.i, align 4 184 %cmp3.i = icmp eq i32 %4, %5 185 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit 186 187land.rhs.i.3: 188 %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3 189 %6 = load i32, i32* %fourth.i, align 4 190 %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 191 %7 = load i32, i32* %fourth2.i, align 4 192 %cmp4.i = icmp eq i32 %6, %7 193 br label %opeq1.exit 194 195opeq1.exit: 196 %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ] 197 ret i1 %8 198} 199 200; Call happens after the loads, and may clobber them. 201define zeroext i1 @opeq1_call_after_loads( 202; X86-LABEL: @opeq1_call_after_loads( 203; X86-NEXT: entry: 204; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 205; X86-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 206; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 207; X86-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 208; X86-NEXT: call void (...) @foo() 209; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] 210; X86-NEXT: br i1 [[CMP_I]], label %"land.rhs.i+land.rhs.i.2+land.rhs.i.3", label [[OPEQ1_EXIT:%.*]] 211; X86: "land.rhs.i+land.rhs.i.2+land.rhs.i.3": 212; X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1 213; X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1 214; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP2]] to i8* 215; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP3]] to i8* 216; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 12) 217; X86-NEXT: [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0 218; X86-NEXT: br label [[OPEQ1_EXIT]] 219; X86: opeq1.exit: 220; X86-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TMP4]], %"land.rhs.i+land.rhs.i.2+land.rhs.i.3" ] 221; X86-NEXT: ret i1 [[TMP5]] 222; 223; Make sure this call is moved to the beginning of the entry block. 224 %S* nocapture readonly dereferenceable(16) %a, 225 %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync { 226entry: 227 %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 228 %0 = load i32, i32* %first.i, align 4 229 %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 230 %1 = load i32, i32* %first1.i, align 4 231 call void (...) @foo() 232 %cmp.i = icmp eq i32 %0, %1 233 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit 234 235land.rhs.i: 236 %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 237 %2 = load i32, i32* %second.i, align 4 238 %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 239 %3 = load i32, i32* %second2.i, align 4 240 %cmp2.i = icmp eq i32 %2, %3 241 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit 242 243land.rhs.i.2: 244 %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2 245 %4 = load i32, i32* %third.i, align 4 246 %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 2 247 %5 = load i32, i32* %third2.i, align 4 248 %cmp3.i = icmp eq i32 %4, %5 249 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit 250 251land.rhs.i.3: 252 %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3 253 %6 = load i32, i32* %fourth.i, align 4 254 %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 255 %7 = load i32, i32* %fourth2.i, align 4 256 %cmp4.i = icmp eq i32 %6, %7 257 br label %opeq1.exit 258 259opeq1.exit: 260 %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ] 261 ret i1 %8 262} 263