1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3; RUN: opt < %s -passes=instcombine -S | FileCheck %s 4 5target datalayout = "n32:64" 6 7define void @MainKernel(i32 %iNumSteps, i32 %tid, i32 %base) { 8; CHECK-LABEL: @MainKernel( 9; CHECK-NEXT: [[CALLA:%.*]] = alloca [258 x float], align 4 10; CHECK-NEXT: [[CALLB:%.*]] = alloca [258 x float], align 4 11; CHECK-NEXT: [[CONV_I:%.*]] = uitofp i32 [[INUMSTEPS:%.*]] to float 12; CHECK-NEXT: [[CONV_I12:%.*]] = zext i32 [[TID:%.*]] to i64 13; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLA]], i64 0, i64 [[CONV_I12]] 14; CHECK-NEXT: store float [[CONV_I]], float* [[ARRAYIDX3]], align 4 15; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLB]], i64 0, i64 [[CONV_I12]] 16; CHECK-NEXT: store float [[CONV_I]], float* [[ARRAYIDX6]], align 4 17; CHECK-NEXT: [[CMP7:%.*]] = icmp eq i32 [[TID]], 0 18; CHECK-NEXT: br i1 [[CMP7]], label [[DOTBB1:%.*]], label [[DOTBB2:%.*]] 19; CHECK: .bb1: 20; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLA]], i64 0, i64 256 21; CHECK-NEXT: store float [[CONV_I]], float* [[ARRAYIDX10]], align 4 22; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLB]], i64 0, i64 256 23; CHECK-NEXT: store float 0.000000e+00, float* [[ARRAYIDX11]], align 4 24; CHECK-NEXT: br label [[DOTBB2]] 25; CHECK: .bb2: 26; CHECK-NEXT: [[CMP135:%.*]] = icmp sgt i32 [[INUMSTEPS]], 0 27; CHECK-NEXT: br i1 [[CMP135]], label [[DOTBB3:%.*]], label [[DOTBB8:%.*]] 28; CHECK: .bb3: 29; CHECK-NEXT: [[TMP1:%.*]] = phi float [ [[TMP10:%.*]], [[DOTBB12:%.*]] ], [ [[CONV_I]], [[DOTBB2]] ] 30; CHECK-NEXT: [[TMP2:%.*]] = phi float [ [[TMP11:%.*]], [[DOTBB12]] ], [ [[CONV_I]], [[DOTBB2]] ] 31; CHECK-NEXT: [[I12_06:%.*]] = phi i32 [ [[SUB:%.*]], [[DOTBB12]] ], [ [[INUMSTEPS]], [[DOTBB2]] ] 32; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[I12_06]], [[BASE:%.*]] 33; CHECK-NEXT: [[ADD:%.*]] = add i32 [[I12_06]], 1 34; CHECK-NEXT: [[CONV_I9:%.*]] = sext i32 [[ADD]] to i64 35; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLA]], i64 0, i64 [[CONV_I9]] 36; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLB]], i64 0, i64 [[CONV_I9]] 37; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[I12_06]], [[BASE]] 38; CHECK-NEXT: br i1 [[TMP3]], label [[DOTBB4:%.*]], label [[DOTBB5:%.*]] 39; CHECK: .bb4: 40; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX20]], align 4 41; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX24]], align 4 42; CHECK-NEXT: [[ADD33:%.*]] = fadd float [[TMP5]], [[TMP4]] 43; CHECK-NEXT: [[ADD33_1:%.*]] = fadd float [[ADD33]], [[TMP1]] 44; CHECK-NEXT: [[ADD33_2:%.*]] = fadd float [[ADD33_1]], [[TMP2]] 45; CHECK-NEXT: br label [[DOTBB5]] 46; CHECK: .bb5: 47; CHECK-NEXT: [[TMP6:%.*]] = phi float [ [[ADD33_1]], [[DOTBB4]] ], [ [[TMP1]], [[DOTBB3]] ] 48; CHECK-NEXT: [[TMP7:%.*]] = phi float [ [[ADD33_2]], [[DOTBB4]] ], [ [[TMP2]], [[DOTBB3]] ] 49; CHECK-NEXT: br i1 [[CMP40]], label [[DOTBB6:%.*]], label [[DOTBB7:%.*]] 50; CHECK: .bb6: 51; CHECK-NEXT: store float [[TMP7]], float* [[ARRAYIDX3]], align 4 52; CHECK-NEXT: store float [[TMP6]], float* [[ARRAYIDX6]], align 4 53; CHECK-NEXT: br label [[DOTBB7]] 54; CHECK: .bb7: 55; CHECK-NEXT: br i1 [[TMP3]], label [[DOTBB9:%.*]], label [[DOTBB10:%.*]] 56; CHECK: .bb8: 57; CHECK-NEXT: ret void 58; CHECK: .bb9: 59; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX20]], align 4 60; CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX24]], align 4 61; CHECK-NEXT: [[ADD33_112:%.*]] = fadd float [[TMP9]], [[TMP8]] 62; CHECK-NEXT: [[ADD33_1_1:%.*]] = fadd float [[ADD33_112]], [[TMP6]] 63; CHECK-NEXT: [[ADD33_2_1:%.*]] = fadd float [[ADD33_1_1]], [[TMP7]] 64; CHECK-NEXT: br label [[DOTBB10]] 65; CHECK: .bb10: 66; CHECK-NEXT: [[TMP10]] = phi float [ [[ADD33_1_1]], [[DOTBB9]] ], [ [[TMP6]], [[DOTBB7]] ] 67; CHECK-NEXT: [[TMP11]] = phi float [ [[ADD33_2_1]], [[DOTBB9]] ], [ [[TMP7]], [[DOTBB7]] ] 68; CHECK-NEXT: br i1 [[CMP40]], label [[DOTBB11:%.*]], label [[DOTBB12]] 69; CHECK: .bb11: 70; CHECK-NEXT: store float [[TMP11]], float* [[ARRAYIDX3]], align 4 71; CHECK-NEXT: store float [[TMP10]], float* [[ARRAYIDX6]], align 4 72; CHECK-NEXT: br label [[DOTBB12]] 73; CHECK: .bb12: 74; CHECK-NEXT: [[SUB]] = add i32 [[I12_06]], -4 75; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[SUB]], 0 76; CHECK-NEXT: br i1 [[CMP13]], label [[DOTBB3]], label [[DOTBB8]] 77; 78 %callA = alloca [258 x float], align 4 79 %callB = alloca [258 x float], align 4 80 %conv.i = uitofp i32 %iNumSteps to float 81 %1 = bitcast float %conv.i to i32 82 %conv.i12 = zext i32 %tid to i64 83 %arrayidx3 = getelementptr inbounds [258 x float], [258 x float]* %callA, i64 0, i64 %conv.i12 84 %2 = bitcast float* %arrayidx3 to i32* 85 store i32 %1, i32* %2, align 4 86 %arrayidx6 = getelementptr inbounds [258 x float], [258 x float]* %callB, i64 0, i64 %conv.i12 87 %3 = bitcast float* %arrayidx6 to i32* 88 store i32 %1, i32* %3, align 4 89 %cmp7 = icmp eq i32 %tid, 0 90 br i1 %cmp7, label %.bb1, label %.bb2 91 92.bb1: 93 %arrayidx10 = getelementptr inbounds [258 x float], [258 x float]* %callA, i64 0, i64 256 94 store float %conv.i, float* %arrayidx10, align 4 95 %arrayidx11 = getelementptr inbounds [258 x float], [258 x float]* %callB, i64 0, i64 256 96 store float 0.000000e+00, float* %arrayidx11, align 4 97 br label %.bb2 98 99.bb2: 100 %cmp135 = icmp sgt i32 %iNumSteps, 0 101 br i1 %cmp135, label %.bb3, label %.bb8 102 103.bb3: 104 %rA.sroa.8.0 = phi i32 [ %rA.sroa.8.2, %.bb12 ], [ %1, %.bb2 ] 105 %rA.sroa.0.0 = phi i32 [ %rA.sroa.0.2, %.bb12 ], [ %1, %.bb2 ] 106 %i12.06 = phi i32 [ %sub, %.bb12 ], [ %iNumSteps, %.bb2 ] 107 %4 = icmp ugt i32 %i12.06, %base 108 %add = add i32 %i12.06, 1 109 %conv.i9 = sext i32 %add to i64 110 %arrayidx20 = getelementptr inbounds [258 x float], [258 x float]* %callA, i64 0, i64 %conv.i9 111 %5 = bitcast float* %arrayidx20 to i32* 112 %arrayidx24 = getelementptr inbounds [258 x float], [258 x float]* %callB, i64 0, i64 %conv.i9 113 %6 = bitcast float* %arrayidx24 to i32* 114 %cmp40 = icmp ult i32 %i12.06, %base 115 br i1 %4, label %.bb4, label %.bb5 116 117.bb4: 118 %7 = load i32, i32* %5, align 4 119 %8 = load i32, i32* %6, align 4 120 %9 = bitcast i32 %8 to float 121 %10 = bitcast i32 %7 to float 122 %add33 = fadd float %9, %10 123 %11 = bitcast i32 %rA.sroa.8.0 to float 124 %add33.1 = fadd float %add33, %11 125 %12 = bitcast float %add33.1 to i32 126 %13 = bitcast i32 %rA.sroa.0.0 to float 127 %add33.2 = fadd float %add33.1, %13 128 %14 = bitcast float %add33.2 to i32 129 br label %.bb5 130 131.bb5: 132 %rA.sroa.8.1 = phi i32 [ %12, %.bb4 ], [ %rA.sroa.8.0, %.bb3 ] 133 %rA.sroa.0.1 = phi i32 [ %14, %.bb4 ], [ %rA.sroa.0.0, %.bb3 ] 134 br i1 %cmp40, label %.bb6, label %.bb7 135 136.bb6: 137 store i32 %rA.sroa.0.1, i32* %2, align 4 138 store i32 %rA.sroa.8.1, i32* %3, align 4 139 br label %.bb7 140 141.bb7: 142 br i1 %4, label %.bb9, label %.bb10 143 144.bb8: 145 ret void 146 147.bb9: 148 %15 = load i32, i32* %5, align 4 149 %16 = load i32, i32* %6, align 4 150 %17 = bitcast i32 %16 to float 151 %18 = bitcast i32 %15 to float 152 %add33.112 = fadd float %17, %18 153 %19 = bitcast i32 %rA.sroa.8.1 to float 154 %add33.1.1 = fadd float %add33.112, %19 155 %20 = bitcast float %add33.1.1 to i32 156 %21 = bitcast i32 %rA.sroa.0.1 to float 157 %add33.2.1 = fadd float %add33.1.1, %21 158 %22 = bitcast float %add33.2.1 to i32 159 br label %.bb10 160 161.bb10: 162 %rA.sroa.8.2 = phi i32 [ %20, %.bb9 ], [ %rA.sroa.8.1, %.bb7 ] 163 %rA.sroa.0.2 = phi i32 [ %22, %.bb9 ], [ %rA.sroa.0.1, %.bb7 ] 164 br i1 %cmp40, label %.bb11, label %.bb12 165 166.bb11: 167 store i32 %rA.sroa.0.2, i32* %2, align 4 168 store i32 %rA.sroa.8.2, i32* %3, align 4 169 br label %.bb12 170 171.bb12: 172 %sub = add i32 %i12.06, -4 173 %cmp13 = icmp sgt i32 %sub, 0 174 br i1 %cmp13, label %.bb3, label %.bb8 175} 176 177declare i32 @get_i32() 178declare i3 @get_i3() 179declare void @bar() 180 181define i37 @zext_from_legal_to_illegal_type(i32 %x) { 182; CHECK-LABEL: @zext_from_legal_to_illegal_type( 183; CHECK-NEXT: entry: 184; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 42 185; CHECK-NEXT: br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]] 186; CHECK: t: 187; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32() 188; CHECK-NEXT: br label [[EXIT:%.*]] 189; CHECK: f: 190; CHECK-NEXT: call void @bar() 191; CHECK-NEXT: br label [[EXIT]] 192; CHECK: exit: 193; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[Y]], [[T]] ], [ 3, [[F]] ] 194; CHECK-NEXT: [[R:%.*]] = zext i32 [[P]] to i37 195; CHECK-NEXT: ret i37 [[R]] 196; 197entry: 198 %cmp = icmp eq i32 %x, 42 199 br i1 %cmp, label %t, label %f 200 201t: 202 %y = call i32 @get_i32() 203 br label %exit 204 205f: 206 call void @bar() 207 br label %exit 208 209exit: 210 %p = phi i32 [ %y, %t ], [ 3, %f ] 211 %r = zext i32 %p to i37 212 ret i37 %r 213} 214 215define i37 @zext_from_illegal_to_illegal_type(i32 %x) { 216; CHECK-LABEL: @zext_from_illegal_to_illegal_type( 217; CHECK-NEXT: entry: 218; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 42 219; CHECK-NEXT: br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]] 220; CHECK: t: 221; CHECK-NEXT: [[Y:%.*]] = call i3 @get_i3() 222; CHECK-NEXT: br label [[EXIT:%.*]] 223; CHECK: f: 224; CHECK-NEXT: call void @bar() 225; CHECK-NEXT: br label [[EXIT]] 226; CHECK: exit: 227; CHECK-NEXT: [[P:%.*]] = phi i3 [ [[Y]], [[T]] ], [ 3, [[F]] ] 228; CHECK-NEXT: [[R:%.*]] = zext i3 [[P]] to i37 229; CHECK-NEXT: ret i37 [[R]] 230; 231entry: 232 %cmp = icmp eq i32 %x, 42 233 br i1 %cmp, label %t, label %f 234 235t: 236 %y = call i3 @get_i3() 237 br label %exit 238 239f: 240 call void @bar() 241 br label %exit 242 243exit: 244 %p = phi i3 [ %y, %t ], [ 3, %f ] 245 %r = zext i3 %p to i37 246 ret i37 %r 247} 248 249define i64 @zext_from_legal_to_legal_type(i32 %x) { 250; CHECK-LABEL: @zext_from_legal_to_legal_type( 251; CHECK-NEXT: entry: 252; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 42 253; CHECK-NEXT: br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]] 254; CHECK: t: 255; CHECK-NEXT: [[Y:%.*]] = call i32 @get_i32() 256; CHECK-NEXT: [[PHI_CAST:%.*]] = zext i32 [[Y]] to i64 257; CHECK-NEXT: br label [[EXIT:%.*]] 258; CHECK: f: 259; CHECK-NEXT: call void @bar() 260; CHECK-NEXT: br label [[EXIT]] 261; CHECK: exit: 262; CHECK-NEXT: [[P:%.*]] = phi i64 [ [[PHI_CAST]], [[T]] ], [ 3, [[F]] ] 263; CHECK-NEXT: ret i64 [[P]] 264; 265entry: 266 %cmp = icmp eq i32 %x, 42 267 br i1 %cmp, label %t, label %f 268 269t: 270 %y = call i32 @get_i32() 271 br label %exit 272 273f: 274 call void @bar() 275 br label %exit 276 277exit: 278 %p = phi i32 [ %y, %t ], [ 3, %f ] 279 %r = zext i32 %p to i64 280 ret i64 %r 281} 282 283define i64 @zext_from_illegal_to_legal_type(i32 %x) { 284; CHECK-LABEL: @zext_from_illegal_to_legal_type( 285; CHECK-NEXT: entry: 286; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 42 287; CHECK-NEXT: br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]] 288; CHECK: t: 289; CHECK-NEXT: [[Y:%.*]] = call i3 @get_i3() 290; CHECK-NEXT: [[PHI_CAST:%.*]] = zext i3 [[Y]] to i64 291; CHECK-NEXT: br label [[EXIT:%.*]] 292; CHECK: f: 293; CHECK-NEXT: call void @bar() 294; CHECK-NEXT: br label [[EXIT]] 295; CHECK: exit: 296; CHECK-NEXT: [[P:%.*]] = phi i64 [ [[PHI_CAST]], [[T]] ], [ 3, [[F]] ] 297; CHECK-NEXT: ret i64 [[P]] 298; 299entry: 300 %cmp = icmp eq i32 %x, 42 301 br i1 %cmp, label %t, label %f 302 303t: 304 %y = call i3 @get_i3() 305 br label %exit 306 307f: 308 call void @bar() 309 br label %exit 310 311exit: 312 %p = phi i3 [ %y, %t ], [ 3, %f ] 313 %r = zext i3 %p to i64 314 ret i64 %r 315} 316 317define i8 @trunc_in_loop_exit_block() { 318; CHECK-LABEL: @trunc_in_loop_exit_block( 319; CHECK-NEXT: entry: 320; CHECK-NEXT: br label [[LOOP:%.*]] 321; CHECK: loop: 322; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 323; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[IV_NEXT]], [[LOOP_LATCH]] ] 324; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV]], 100 325; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]] 326; CHECK: loop.latch: 327; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 328; CHECK-NEXT: br label [[LOOP]] 329; CHECK: exit: 330; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[PHI]] to i8 331; CHECK-NEXT: ret i8 [[TRUNC]] 332; 333entry: 334 br label %loop 335 336loop: 337 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] 338 %phi = phi i32 [ 1, %entry ], [ %iv.next, %loop.latch ] 339 %cmp = icmp ult i32 %iv, 100 340 br i1 %cmp, label %loop.latch, label %exit 341 342loop.latch: 343 %iv.next = add i32 %iv, 1 344 br label %loop 345 346exit: 347 %trunc = trunc i32 %phi to i8 348 ret i8 %trunc 349} 350 351define i32 @zext_in_loop_and_exit_block(i8 %step, i32 %end) { 352; CHECK-LABEL: @zext_in_loop_and_exit_block( 353; CHECK-NEXT: entry: 354; CHECK-NEXT: br label [[LOOP:%.*]] 355; CHECK: loop: 356; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 357; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i32 358; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[IV_EXT]], [[END:%.*]] 359; CHECK-NEXT: br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 360; CHECK: loop.latch: 361; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], [[STEP:%.*]] 362; CHECK-NEXT: br label [[LOOP]] 363; CHECK: exit: 364; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[IV]] to i32 365; CHECK-NEXT: ret i32 [[EXT]] 366; 367entry: 368 br label %loop 369 370loop: 371 %iv = phi i8 [ 0, %entry ], [ %iv.next.trunc, %loop.latch ] 372 %iv.ext = zext i8 %iv to i32 373 %cmp = icmp ne i32 %iv.ext, %end 374 br i1 %cmp, label %loop.latch, label %exit 375 376loop.latch: 377 %step.ext = zext i8 %step to i32 378 %iv.next = add i32 %iv.ext, %step.ext 379 %iv.next.trunc = trunc i32 %iv.next to i8 380 br label %loop 381 382exit: 383 %ext = zext i8 %iv to i32 384 ret i32 %ext 385} 386