1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instsimplify -S | FileCheck %s
3
4; If any bits of the shift amount are known to make it exceed or equal
5; the number of bits in the type, the shift causes undefined behavior.
6
7define i32 @shl_amount_is_known_bogus(i32 %a, i32 %b) {
8; CHECK-LABEL: @shl_amount_is_known_bogus(
9; CHECK-NEXT:    ret i32 poison
10;
11  %or = or i32 %b, 32
12  %shl = shl i32 %a, %or
13  ret i32 %shl
14}
15
16; Check some weird types and the other shift ops.
17
18define i31 @lshr_amount_is_known_bogus(i31 %a, i31 %b) {
19; CHECK-LABEL: @lshr_amount_is_known_bogus(
20; CHECK-NEXT:    ret i31 poison
21;
22  %or = or i31 %b, 31
23  %shr = lshr i31 %a, %or
24  ret i31 %shr
25}
26
27define i33 @ashr_amount_is_known_bogus(i33 %a, i33 %b) {
28; CHECK-LABEL: @ashr_amount_is_known_bogus(
29; CHECK-NEXT:    ret i33 poison
30;
31  %or = or i33 %b, 33
32  %shr = ashr i33 %a, %or
33  ret i33 %shr
34}
35
36
37; If all valid bits of the shift amount are known 0, there's no shift.
38; It doesn't matter if high bits are set because that would be undefined.
39; Therefore, the only possible valid result of these shifts is %a.
40
41define i16 @ashr_amount_is_zero(i16 %a, i16 %b) {
42; CHECK-LABEL: @ashr_amount_is_zero(
43; CHECK-NEXT:    ret i16 [[A:%.*]]
44;
45  %and = and i16 %b, 65520 ; 0xfff0
46  %shr = ashr i16 %a, %and
47  ret i16 %shr
48}
49
50define i300 @lshr_amount_is_zero(i300 %a, i300 %b) {
51; CHECK-LABEL: @lshr_amount_is_zero(
52; CHECK-NEXT:    ret i300 [[A:%.*]]
53;
54  %and = and i300 %b, 2048
55  %shr = lshr i300 %a, %and
56  ret i300 %shr
57}
58
59define i9 @shl_amount_is_zero(i9 %a, i9 %b) {
60; CHECK-LABEL: @shl_amount_is_zero(
61; CHECK-NEXT:    ret i9 [[A:%.*]]
62;
63  %and = and i9 %b, 496 ; 0x1f0
64  %shl = shl i9 %a, %and
65  ret i9 %shl
66}
67
68
69; Verify that we've calculated the log2 boundary of valid bits correctly for a weird type.
70
71define i9 @shl_amount_is_not_known_zero(i9 %a, i9 %b) {
72; CHECK-LABEL: @shl_amount_is_not_known_zero(
73; CHECK-NEXT:    [[AND:%.*]] = and i9 [[B:%.*]], -8
74; CHECK-NEXT:    [[SHL:%.*]] = shl i9 [[A:%.*]], [[AND]]
75; CHECK-NEXT:    ret i9 [[SHL]]
76;
77  %and = and i9 %b, 504 ; 0x1f8
78  %shl = shl i9 %a, %and
79  ret i9 %shl
80}
81
82
83; For vectors, we need all scalar elements to meet the requirements to optimize.
84
85define <2 x i32> @ashr_vector_bogus(<2 x i32> %a, <2 x i32> %b) {
86; CHECK-LABEL: @ashr_vector_bogus(
87; CHECK-NEXT:    ret <2 x i32> poison
88;
89  %or = or <2 x i32> %b, <i32 32, i32 32>
90  %shr = ashr <2 x i32> %a, %or
91  ret <2 x i32> %shr
92}
93
94; FIXME: This is undef, but computeKnownBits doesn't handle the union.
95define <2 x i32> @shl_vector_bogus(<2 x i32> %a, <2 x i32> %b) {
96; CHECK-LABEL: @shl_vector_bogus(
97; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[B:%.*]], <i32 32, i32 64>
98; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[OR]]
99; CHECK-NEXT:    ret <2 x i32> [[SHL]]
100;
101  %or = or <2 x i32> %b, <i32 32, i32 64>
102  %shl = shl <2 x i32> %a, %or
103  ret <2 x i32> %shl
104}
105
106define <2 x i32> @lshr_vector_zero(<2 x i32> %a, <2 x i32> %b) {
107; CHECK-LABEL: @lshr_vector_zero(
108; CHECK-NEXT:    ret <2 x i32> [[A:%.*]]
109;
110  %and = and <2 x i32> %b, <i32 64, i32 256>
111  %shr = lshr <2 x i32> %a, %and
112  ret <2 x i32> %shr
113}
114
115; Make sure that weird vector types work too.
116define <2 x i15> @shl_vector_zero(<2 x i15> %a, <2 x i15> %b) {
117; CHECK-LABEL: @shl_vector_zero(
118; CHECK-NEXT:    ret <2 x i15> [[A:%.*]]
119;
120  %and = and <2 x i15> %b, <i15 1024, i15 1024>
121  %shl = shl <2 x i15> %a, %and
122  ret <2 x i15> %shl
123}
124
125define <2 x i32> @shl_vector_for_real(<2 x i32> %a, <2 x i32> %b) {
126; CHECK-LABEL: @shl_vector_for_real(
127; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[B:%.*]], <i32 3, i32 3>
128; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[AND]]
129; CHECK-NEXT:    ret <2 x i32> [[SHL]]
130;
131  %and = and <2 x i32> %b, <i32 3, i32 3> ; a necessary mask op
132  %shl = shl <2 x i32> %a, %and
133  ret <2 x i32> %shl
134}
135
136
137; We calculate the valid bits of the shift using log2, and log2 of 1 (the type width) is 0.
138; That should be ok. Either the shift amount is 0 or invalid (1), so we can always return %a.
139
140define i1 @shl_i1(i1 %a, i1 %b) {
141; CHECK-LABEL: @shl_i1(
142; CHECK-NEXT:    ret i1 [[A:%.*]]
143;
144  %shl = shl i1 %a, %b
145  ret i1 %shl
146}
147
148; The following cases only get folded by InstCombine,
149; see InstCombine/lshr.ll.
150
151declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
152declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
153declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) nounwind readnone
154declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
155
156define i32 @lshr_ctlz_zero_is_undef(i32 %x) {
157; CHECK-LABEL: @lshr_ctlz_zero_is_undef(
158; CHECK-NEXT:    [[CT:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true)
159; CHECK-NEXT:    [[SH:%.*]] = lshr i32 [[CT]], 5
160; CHECK-NEXT:    ret i32 [[SH]]
161;
162  %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
163  %sh = lshr i32 %ct, 5
164  ret i32 %sh
165}
166
167define i32 @lshr_cttz_zero_is_undef(i32 %x) {
168; CHECK-LABEL: @lshr_cttz_zero_is_undef(
169; CHECK-NEXT:    [[CT:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
170; CHECK-NEXT:    [[SH:%.*]] = lshr i32 [[CT]], 5
171; CHECK-NEXT:    ret i32 [[SH]]
172;
173  %ct = call i32 @llvm.cttz.i32(i32 %x, i1 true)
174  %sh = lshr i32 %ct, 5
175  ret i32 %sh
176}
177
178define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) {
179; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec(
180; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true)
181; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3>
182; CHECK-NEXT:    ret <2 x i8> [[SH]]
183;
184  %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
185  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
186  ret <2 x i8> %sh
187}
188
189define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) {
190; CHECK-LABEL: @lshr_ctlz_zero_is_undef_vec(
191; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true)
192; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0>
193; CHECK-NEXT:    [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0
194; CHECK-NEXT:    ret i8 [[EX]]
195;
196  %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
197  %sh = lshr <2 x i8> %ct, <i8 3, i8 0>
198  %ex = extractelement <2 x i8> %sh, i32 0
199  ret i8 %ex
200}
201
202define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) {
203; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec(
204; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true)
205; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3>
206; CHECK-NEXT:    ret <2 x i8> [[SH]]
207;
208  %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
209  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
210  ret <2 x i8> %sh
211}
212
213define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) {
214; CHECK-LABEL: @lshr_cttz_zero_is_undef_vec(
215; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true)
216; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0>
217; CHECK-NEXT:    [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0
218; CHECK-NEXT:    ret i8 [[EX]]
219;
220  %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
221  %sh = lshr <2 x i8> %ct, <i8 3, i8 0>
222  %ex = extractelement <2 x i8> %sh, i32 0
223  ret i8 %ex
224}
225
226