1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3; RUN: opt < %s -passes=instcombine -S | FileCheck %s
4
5target datalayout = "n32:64"
6
7define void @MainKernel(i32 %iNumSteps, i32 %tid, i32 %base) {
8; CHECK-LABEL: @MainKernel(
9; CHECK-NEXT:    [[CALLA:%.*]] = alloca [258 x float], align 4
10; CHECK-NEXT:    [[CALLB:%.*]] = alloca [258 x float], align 4
11; CHECK-NEXT:    [[CONV_I:%.*]] = uitofp i32 [[INUMSTEPS:%.*]] to float
12; CHECK-NEXT:    [[CONV_I12:%.*]] = zext i32 [[TID:%.*]] to i64
13; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLA]], i64 0, i64 [[CONV_I12]]
14; CHECK-NEXT:    store float [[CONV_I]], float* [[ARRAYIDX3]], align 4
15; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLB]], i64 0, i64 [[CONV_I12]]
16; CHECK-NEXT:    store float [[CONV_I]], float* [[ARRAYIDX6]], align 4
17; CHECK-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[TID]], 0
18; CHECK-NEXT:    br i1 [[CMP7]], label [[DOTBB1:%.*]], label [[DOTBB2:%.*]]
19; CHECK:       .bb1:
20; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLA]], i64 0, i64 256
21; CHECK-NEXT:    store float [[CONV_I]], float* [[ARRAYIDX10]], align 4
22; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLB]], i64 0, i64 256
23; CHECK-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX11]], align 4
24; CHECK-NEXT:    br label [[DOTBB2]]
25; CHECK:       .bb2:
26; CHECK-NEXT:    [[CMP135:%.*]] = icmp sgt i32 [[INUMSTEPS]], 0
27; CHECK-NEXT:    br i1 [[CMP135]], label [[DOTBB3:%.*]], label [[DOTBB8:%.*]]
28; CHECK:       .bb3:
29; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ [[TMP10:%.*]], [[DOTBB12:%.*]] ], [ [[CONV_I]], [[DOTBB2]] ]
30; CHECK-NEXT:    [[TMP2:%.*]] = phi float [ [[TMP11:%.*]], [[DOTBB12]] ], [ [[CONV_I]], [[DOTBB2]] ]
31; CHECK-NEXT:    [[I12_06:%.*]] = phi i32 [ [[SUB:%.*]], [[DOTBB12]] ], [ [[INUMSTEPS]], [[DOTBB2]] ]
32; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i32 [[I12_06]], [[BASE:%.*]]
33; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[I12_06]], 1
34; CHECK-NEXT:    [[CONV_I9:%.*]] = sext i32 [[ADD]] to i64
35; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLA]], i64 0, i64 [[CONV_I9]]
36; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds [258 x float], [258 x float]* [[CALLB]], i64 0, i64 [[CONV_I9]]
37; CHECK-NEXT:    [[CMP40:%.*]] = icmp ult i32 [[I12_06]], [[BASE]]
38; CHECK-NEXT:    br i1 [[TMP3]], label [[DOTBB4:%.*]], label [[DOTBB5:%.*]]
39; CHECK:       .bb4:
40; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* [[ARRAYIDX20]], align 4
41; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[ARRAYIDX24]], align 4
42; CHECK-NEXT:    [[ADD33:%.*]] = fadd float [[TMP5]], [[TMP4]]
43; CHECK-NEXT:    [[ADD33_1:%.*]] = fadd float [[ADD33]], [[TMP1]]
44; CHECK-NEXT:    [[ADD33_2:%.*]] = fadd float [[ADD33_1]], [[TMP2]]
45; CHECK-NEXT:    br label [[DOTBB5]]
46; CHECK:       .bb5:
47; CHECK-NEXT:    [[TMP6:%.*]] = phi float [ [[ADD33_1]], [[DOTBB4]] ], [ [[TMP1]], [[DOTBB3]] ]
48; CHECK-NEXT:    [[TMP7:%.*]] = phi float [ [[ADD33_2]], [[DOTBB4]] ], [ [[TMP2]], [[DOTBB3]] ]
49; CHECK-NEXT:    br i1 [[CMP40]], label [[DOTBB6:%.*]], label [[DOTBB7:%.*]]
50; CHECK:       .bb6:
51; CHECK-NEXT:    store float [[TMP7]], float* [[ARRAYIDX3]], align 4
52; CHECK-NEXT:    store float [[TMP6]], float* [[ARRAYIDX6]], align 4
53; CHECK-NEXT:    br label [[DOTBB7]]
54; CHECK:       .bb7:
55; CHECK-NEXT:    br i1 [[TMP3]], label [[DOTBB9:%.*]], label [[DOTBB10:%.*]]
56; CHECK:       .bb8:
57; CHECK-NEXT:    ret void
58; CHECK:       .bb9:
59; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* [[ARRAYIDX20]], align 4
60; CHECK-NEXT:    [[TMP9:%.*]] = load float, float* [[ARRAYIDX24]], align 4
61; CHECK-NEXT:    [[ADD33_112:%.*]] = fadd float [[TMP9]], [[TMP8]]
62; CHECK-NEXT:    [[ADD33_1_1:%.*]] = fadd float [[ADD33_112]], [[TMP6]]
63; CHECK-NEXT:    [[ADD33_2_1:%.*]] = fadd float [[ADD33_1_1]], [[TMP7]]
64; CHECK-NEXT:    br label [[DOTBB10]]
65; CHECK:       .bb10:
66; CHECK-NEXT:    [[TMP10]] = phi float [ [[ADD33_1_1]], [[DOTBB9]] ], [ [[TMP6]], [[DOTBB7]] ]
67; CHECK-NEXT:    [[TMP11]] = phi float [ [[ADD33_2_1]], [[DOTBB9]] ], [ [[TMP7]], [[DOTBB7]] ]
68; CHECK-NEXT:    br i1 [[CMP40]], label [[DOTBB11:%.*]], label [[DOTBB12]]
69; CHECK:       .bb11:
70; CHECK-NEXT:    store float [[TMP11]], float* [[ARRAYIDX3]], align 4
71; CHECK-NEXT:    store float [[TMP10]], float* [[ARRAYIDX6]], align 4
72; CHECK-NEXT:    br label [[DOTBB12]]
73; CHECK:       .bb12:
74; CHECK-NEXT:    [[SUB]] = add i32 [[I12_06]], -4
75; CHECK-NEXT:    [[CMP13:%.*]] = icmp sgt i32 [[SUB]], 0
76; CHECK-NEXT:    br i1 [[CMP13]], label [[DOTBB3]], label [[DOTBB8]]
77;
78  %callA = alloca [258 x float], align 4
79  %callB = alloca [258 x float], align 4
80  %conv.i = uitofp i32 %iNumSteps to float
81  %1 = bitcast float %conv.i to i32
82  %conv.i12 = zext i32 %tid to i64
83  %arrayidx3 = getelementptr inbounds [258 x float], [258 x float]* %callA, i64 0, i64 %conv.i12
84  %2 = bitcast float* %arrayidx3 to i32*
85  store i32 %1, i32* %2, align 4
86  %arrayidx6 = getelementptr inbounds [258 x float], [258 x float]* %callB, i64 0, i64 %conv.i12
87  %3 = bitcast float* %arrayidx6 to i32*
88  store i32 %1, i32* %3, align 4
89  %cmp7 = icmp eq i32 %tid, 0
90  br i1 %cmp7, label %.bb1, label %.bb2
91
92.bb1:
93  %arrayidx10 = getelementptr inbounds [258 x float], [258 x float]* %callA, i64 0, i64 256
94  store float %conv.i, float* %arrayidx10, align 4
95  %arrayidx11 = getelementptr inbounds [258 x float], [258 x float]* %callB, i64 0, i64 256
96  store float 0.000000e+00, float* %arrayidx11, align 4
97  br label %.bb2
98
99.bb2:
100  %cmp135 = icmp sgt i32 %iNumSteps, 0
101  br i1 %cmp135, label %.bb3, label %.bb8
102
103.bb3:
104  %rA.sroa.8.0 = phi i32 [ %rA.sroa.8.2, %.bb12 ], [ %1, %.bb2 ]
105  %rA.sroa.0.0 = phi i32 [ %rA.sroa.0.2, %.bb12 ], [ %1, %.bb2 ]
106  %i12.06 = phi i32 [ %sub, %.bb12 ], [ %iNumSteps, %.bb2 ]
107  %4 = icmp ugt i32 %i12.06, %base
108  %add = add i32 %i12.06, 1
109  %conv.i9 = sext i32 %add to i64
110  %arrayidx20 = getelementptr inbounds [258 x float], [258 x float]* %callA, i64 0, i64 %conv.i9
111  %5 = bitcast float* %arrayidx20 to i32*
112  %arrayidx24 = getelementptr inbounds [258 x float], [258 x float]* %callB, i64 0, i64 %conv.i9
113  %6 = bitcast float* %arrayidx24 to i32*
114  %cmp40 = icmp ult i32 %i12.06, %base
115  br i1 %4, label %.bb4, label %.bb5
116
117.bb4:
118  %7 = load i32, i32* %5, align 4
119  %8 = load i32, i32* %6, align 4
120  %9 = bitcast i32 %8 to float
121  %10 = bitcast i32 %7 to float
122  %add33 = fadd float %9, %10
123  %11 = bitcast i32 %rA.sroa.8.0 to float
124  %add33.1 = fadd float %add33, %11
125  %12 = bitcast float %add33.1 to i32
126  %13 = bitcast i32 %rA.sroa.0.0 to float
127  %add33.2 = fadd float %add33.1, %13
128  %14 = bitcast float %add33.2 to i32
129  br label %.bb5
130
131.bb5:
132  %rA.sroa.8.1 = phi i32 [ %12, %.bb4 ], [ %rA.sroa.8.0, %.bb3 ]
133  %rA.sroa.0.1 = phi i32 [ %14, %.bb4 ], [ %rA.sroa.0.0, %.bb3 ]
134  br i1 %cmp40, label %.bb6, label %.bb7
135
136.bb6:
137  store i32 %rA.sroa.0.1, i32* %2, align 4
138  store i32 %rA.sroa.8.1, i32* %3, align 4
139  br label %.bb7
140
141.bb7:
142  br i1 %4, label %.bb9, label %.bb10
143
144.bb8:
145  ret void
146
147.bb9:
148  %15 = load i32, i32* %5, align 4
149  %16 = load i32, i32* %6, align 4
150  %17 = bitcast i32 %16 to float
151  %18 = bitcast i32 %15 to float
152  %add33.112 = fadd float %17, %18
153  %19 = bitcast i32 %rA.sroa.8.1 to float
154  %add33.1.1 = fadd float %add33.112, %19
155  %20 = bitcast float %add33.1.1 to i32
156  %21 = bitcast i32 %rA.sroa.0.1 to float
157  %add33.2.1 = fadd float %add33.1.1, %21
158  %22 = bitcast float %add33.2.1 to i32
159  br label %.bb10
160
161.bb10:
162  %rA.sroa.8.2 = phi i32 [ %20, %.bb9 ], [ %rA.sroa.8.1, %.bb7 ]
163  %rA.sroa.0.2 = phi i32 [ %22, %.bb9 ], [ %rA.sroa.0.1, %.bb7 ]
164  br i1 %cmp40, label %.bb11, label %.bb12
165
166.bb11:
167  store i32 %rA.sroa.0.2, i32* %2, align 4
168  store i32 %rA.sroa.8.2, i32* %3, align 4
169  br label %.bb12
170
171.bb12:
172  %sub = add i32 %i12.06, -4
173  %cmp13 = icmp sgt i32 %sub, 0
174  br i1 %cmp13, label %.bb3, label %.bb8
175}
176
177declare i32 @get_i32()
178declare i3 @get_i3()
179declare void @bar()
180
181define i37 @zext_from_legal_to_illegal_type(i32 %x) {
182; CHECK-LABEL: @zext_from_legal_to_illegal_type(
183; CHECK-NEXT:  entry:
184; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 42
185; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
186; CHECK:       t:
187; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
188; CHECK-NEXT:    br label [[EXIT:%.*]]
189; CHECK:       f:
190; CHECK-NEXT:    call void @bar()
191; CHECK-NEXT:    br label [[EXIT]]
192; CHECK:       exit:
193; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[Y]], [[T]] ], [ 3, [[F]] ]
194; CHECK-NEXT:    [[R:%.*]] = zext i32 [[P]] to i37
195; CHECK-NEXT:    ret i37 [[R]]
196;
197entry:
198  %cmp = icmp eq i32 %x, 42
199  br i1 %cmp, label %t, label %f
200
201t:
202  %y = call i32 @get_i32()
203  br label %exit
204
205f:
206  call void @bar()
207  br label %exit
208
209exit:
210  %p = phi i32 [ %y, %t ], [ 3, %f ]
211  %r = zext i32 %p to i37
212  ret i37 %r
213}
214
215define i37 @zext_from_illegal_to_illegal_type(i32 %x) {
216; CHECK-LABEL: @zext_from_illegal_to_illegal_type(
217; CHECK-NEXT:  entry:
218; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 42
219; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
220; CHECK:       t:
221; CHECK-NEXT:    [[Y:%.*]] = call i3 @get_i3()
222; CHECK-NEXT:    br label [[EXIT:%.*]]
223; CHECK:       f:
224; CHECK-NEXT:    call void @bar()
225; CHECK-NEXT:    br label [[EXIT]]
226; CHECK:       exit:
227; CHECK-NEXT:    [[P:%.*]] = phi i3 [ [[Y]], [[T]] ], [ 3, [[F]] ]
228; CHECK-NEXT:    [[R:%.*]] = zext i3 [[P]] to i37
229; CHECK-NEXT:    ret i37 [[R]]
230;
231entry:
232  %cmp = icmp eq i32 %x, 42
233  br i1 %cmp, label %t, label %f
234
235t:
236  %y = call i3 @get_i3()
237  br label %exit
238
239f:
240  call void @bar()
241  br label %exit
242
243exit:
244  %p = phi i3 [ %y, %t ], [ 3, %f ]
245  %r = zext i3 %p to i37
246  ret i37 %r
247}
248
249define i64 @zext_from_legal_to_legal_type(i32 %x) {
250; CHECK-LABEL: @zext_from_legal_to_legal_type(
251; CHECK-NEXT:  entry:
252; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 42
253; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
254; CHECK:       t:
255; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
256; CHECK-NEXT:    [[PHI_CAST:%.*]] = zext i32 [[Y]] to i64
257; CHECK-NEXT:    br label [[EXIT:%.*]]
258; CHECK:       f:
259; CHECK-NEXT:    call void @bar()
260; CHECK-NEXT:    br label [[EXIT]]
261; CHECK:       exit:
262; CHECK-NEXT:    [[P:%.*]] = phi i64 [ [[PHI_CAST]], [[T]] ], [ 3, [[F]] ]
263; CHECK-NEXT:    ret i64 [[P]]
264;
265entry:
266  %cmp = icmp eq i32 %x, 42
267  br i1 %cmp, label %t, label %f
268
269t:
270  %y = call i32 @get_i32()
271  br label %exit
272
273f:
274  call void @bar()
275  br label %exit
276
277exit:
278  %p = phi i32 [ %y, %t ], [ 3, %f ]
279  %r = zext i32 %p to i64
280  ret i64 %r
281}
282
283define i64 @zext_from_illegal_to_legal_type(i32 %x) {
284; CHECK-LABEL: @zext_from_illegal_to_legal_type(
285; CHECK-NEXT:  entry:
286; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 42
287; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
288; CHECK:       t:
289; CHECK-NEXT:    [[Y:%.*]] = call i3 @get_i3()
290; CHECK-NEXT:    [[PHI_CAST:%.*]] = zext i3 [[Y]] to i64
291; CHECK-NEXT:    br label [[EXIT:%.*]]
292; CHECK:       f:
293; CHECK-NEXT:    call void @bar()
294; CHECK-NEXT:    br label [[EXIT]]
295; CHECK:       exit:
296; CHECK-NEXT:    [[P:%.*]] = phi i64 [ [[PHI_CAST]], [[T]] ], [ 3, [[F]] ]
297; CHECK-NEXT:    ret i64 [[P]]
298;
299entry:
300  %cmp = icmp eq i32 %x, 42
301  br i1 %cmp, label %t, label %f
302
303t:
304  %y = call i3 @get_i3()
305  br label %exit
306
307f:
308  call void @bar()
309  br label %exit
310
311exit:
312  %p = phi i3 [ %y, %t ], [ 3, %f ]
313  %r = zext i3 %p to i64
314  ret i64 %r
315}
316
317define i8 @trunc_in_loop_exit_block() {
318; CHECK-LABEL: @trunc_in_loop_exit_block(
319; CHECK-NEXT:  entry:
320; CHECK-NEXT:    br label [[LOOP:%.*]]
321; CHECK:       loop:
322; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
323; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[IV_NEXT]], [[LOOP_LATCH]] ]
324; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IV]], 100
325; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
326; CHECK:       loop.latch:
327; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
328; CHECK-NEXT:    br label [[LOOP]]
329; CHECK:       exit:
330; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[PHI]] to i8
331; CHECK-NEXT:    ret i8 [[TRUNC]]
332;
333entry:
334  br label %loop
335
336loop:
337  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
338  %phi = phi i32 [ 1, %entry ], [ %iv.next, %loop.latch ]
339  %cmp = icmp ult i32 %iv, 100
340  br i1 %cmp, label %loop.latch, label %exit
341
342loop.latch:
343  %iv.next = add i32 %iv, 1
344  br label %loop
345
346exit:
347  %trunc = trunc i32 %phi to i8
348  ret i8 %trunc
349}
350
351define i32 @zext_in_loop_and_exit_block(i8 %step, i32 %end) {
352; CHECK-LABEL: @zext_in_loop_and_exit_block(
353; CHECK-NEXT:  entry:
354; CHECK-NEXT:    br label [[LOOP:%.*]]
355; CHECK:       loop:
356; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
357; CHECK-NEXT:    [[IV_EXT:%.*]] = zext i8 [[IV]] to i32
358; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[IV_EXT]], [[END:%.*]]
359; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
360; CHECK:       loop.latch:
361; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], [[STEP:%.*]]
362; CHECK-NEXT:    br label [[LOOP]]
363; CHECK:       exit:
364; CHECK-NEXT:    [[EXT:%.*]] = zext i8 [[IV]] to i32
365; CHECK-NEXT:    ret i32 [[EXT]]
366;
367entry:
368  br label %loop
369
370loop:
371  %iv = phi i8 [ 0, %entry ], [ %iv.next.trunc, %loop.latch ]
372  %iv.ext = zext i8 %iv to i32
373  %cmp = icmp ne i32 %iv.ext, %end
374  br i1 %cmp, label %loop.latch, label %exit
375
376loop.latch:
377  %step.ext = zext i8 %step to i32
378  %iv.next = add i32 %iv.ext, %step.ext
379  %iv.next.trunc = trunc i32 %iv.next to i8
380  br label %loop
381
382exit:
383  %ext = zext i8 %iv to i32
384  ret i32 %ext
385}
386