1;; rewrites for integer and floating-point arithmetic
2;; eg: `iadd`, `isub`, `ineg`, `imul`, `fadd`, `fsub`, `fmul`
3
4;; For commutative instructions, we depend on cprop.isle pushing immediates to
5;; the right, and thus only simplify patterns like `x+0`, not `0+x`.
6
7;; x+0 == x.
8(rule (simplify (iadd ty
9                      x
10                      (iconst_u ty 0)))
11      (subsume x))
12;; x-0 == x.
13(rule (simplify (isub ty
14                      x
15                      (iconst_u ty 0)))
16      (subsume x))
17;; 0-x == (ineg x).
18(rule (simplify (isub ty
19                      (iconst_u ty 0)
20                      x))
21      (ineg ty x))
22
23;; x + -y == -y + x == -(y - x) == x - y
24(rule (simplify (iadd ty x (ineg ty y)))
25      (isub ty x y))
26(rule (simplify (iadd ty (ineg ty y) x))
27      (isub ty x y))
28(rule (simplify (ineg ty (isub ty y x)))
29      (isub ty x y))
30;; x - -y == x + y
31(rule (simplify (isub ty x (ineg ty y)))
32      (iadd ty x y))
33
34;; ineg(ineg(x)) == x.
35(rule (simplify (ineg ty (ineg ty x))) (subsume x))
36
37;; ineg(x) * ineg(y) == x*y.
38(rule (simplify (imul ty (ineg ty x) (ineg ty y)))
39      (subsume (imul ty x y)))
40
41;; iabs(ineg(x)) == iabs(x).
42(rule (simplify (iabs ty (ineg ty x)))
43      (iabs ty x))
44
45;; iabs(iabs(x)) == iabs(x).
46(rule (simplify (iabs ty inner @ (iabs ty x)))
47      (subsume inner))
48
49;; x-x == 0.
50(rule (simplify (isub (ty_int ty) x x)) (subsume (iconst_u ty 0)))
51
52;; x*1 == x.
53(rule (simplify (imul ty
54                      x
55                      (iconst_u ty 1)))
56      (subsume x))
57
58;; x*0 == 0.
59(rule (simplify (imul ty
60                      _
61                      zero @ (iconst_u ty 0)))
62      (subsume zero))
63
64;; x*-1 == ineg(x).
65(rule (simplify (imul ty x (iconst_s ty -1)))
66      (ineg ty x))
67
68;; (!x) + 1 == ineg(x)
69(rule (simplify (iadd ty (bnot ty x) (iconst_u ty 1)))
70      (ineg ty x))
71
72;; !(x - 1) == !(x + (-1)) == ineg(x)
73(rule (simplify (bnot ty (isub ty x (iconst_s ty 1))))
74      (ineg ty x))
75(rule (simplify (bnot ty (iadd ty x (iconst_s ty -1))))
76      (ineg ty x))
77
78;; x / 1 == x.
79(rule (simplify_skeleton (sdiv x (iconst_s ty 1))) x)
80(rule (simplify_skeleton (udiv x (iconst_u ty 1))) x)
81
82;; Unsigned `x / d == x >> ilog2(d)` when d is a power of two.
83(rule (simplify_skeleton (udiv x (iconst_u ty (u64_extract_power_of_two d))))
84      (ushr ty x (iconst_u ty (u64_ilog2 d))))
85
86;; Signed `x / d` when d is a power of two is a bit more involved...
87(rule (simplify_skeleton (sdiv x (iconst_u ty (u64_extract_power_of_two d))))
88      (if-let true (u64_gt d 1))
89      ;; This rule musn't fire for the most negative number - which looks like
90      ;; a power of two (sign bit set and otherwise all zeros)
91      (if-let true (u32_lt (u64_trailing_zeros d)
92                           (u32_sub (ty_bits ty) 1)))
93      (let ((k u32 (u64_trailing_zeros d))
94            (t1 Value (sshr ty x (iconst_u ty (u32_sub k 1))))
95            (t2 Value (ushr ty t1 (iconst_u ty (u32_sub (ty_bits ty) k))))
96            (t3 Value (iadd ty x t2))
97            (t4 Value (sshr ty t3 (iconst_s ty k))))
98        t4))
99
100;; And signed `x / d` when d is a negative power of two is the same, but with a
101;; negation.
102(rule (simplify_skeleton (sdiv x (iconst_s ty d)))
103      (if-let true (i64_is_negative_power_of_two d))
104      (if-let true (i64_ne d -1))
105      (let ((k u32 (i64_trailing_zeros d))
106            (t1 Value (sshr ty x (iconst_u ty (u32_sub k 1))))
107            (t2 Value (ushr ty t1 (iconst_u ty (u32_sub (ty_bits ty) k))))
108            (t3 Value (iadd ty x t2))
109            (t4 Value (sshr ty t3 (iconst_s ty k)))
110            (t5 Value (ineg ty t4)))
111        t5))
112
113;; General cases for `udiv` with constant divisors.
114(rule (simplify_skeleton (udiv x (iconst_u $I32 (u64_extract_non_zero (u32_from_u64 d)))))
115      (if-let false (u32_is_power_of_two d))
116      (apply_div_const_magic_u32 (Opcode.Udiv) x d))
117(rule (simplify_skeleton (udiv x (iconst_u $I64 (u64_extract_non_zero d))))
118      (if-let false (u64_is_power_of_two d))
119      (apply_div_const_magic_u64 (Opcode.Udiv) x d))
120
121;; General cases for `sdiv` with constant divisors.
122(rule (simplify_skeleton (sdiv x (iconst_s $I32 (i64_extract_non_zero (i32_from_i64 d)))))
123      (if-let false (i64_is_any_sign_power_of_two d))
124      (apply_div_const_magic_s32 (Opcode.Sdiv) x d))
125(rule (simplify_skeleton (sdiv x (iconst_s $I64 (i64_extract_non_zero d))))
126      (if-let false (i64_is_any_sign_power_of_two d))
127      (apply_div_const_magic_s64 (Opcode.Sdiv) x d))
128
129;; x % 1 == 0
130(rule (simplify_skeleton (urem x (iconst_u ty 1))) (iconst_u ty 0))
131(rule (simplify_skeleton (srem x (iconst_u ty 1))) (iconst_u ty 0))
132(rule (simplify_skeleton (srem x (iconst_s ty -1))) (iconst_u ty 0))
133
134;; Unsigned `x % d == x & ((1 << ilog2(d)) - 1)` when `d` is a power of two.
135(rule (simplify_skeleton (urem x (iconst_u ty (u64_extract_power_of_two d))))
136      (if-let true (u64_gt d 1))
137      (let ((mask Value (iconst_u ty (u64_sub (u64_shl 1 (u64_ilog2 d)) 1))))
138        (band ty x mask)))
139
140;; Signed `x % d` when `d` is a (possibly negative) power of two is a little
141;; more complicated.
142(rule (simplify_skeleton (srem x d_val @ (iconst_s ty d)))
143      ;; Interestingly, this same sequence works for both positive and negative
144      ;; powers of two.
145      (if-let true (i64_is_any_sign_power_of_two d))
146      (if-let true (i64_ne d 1))
147      (if-let true (i64_ne d -1))
148      (let ((k u32 (i64_trailing_zeros d))
149            (t1 Value (sshr ty x (iconst_u ty (u32_sub k 1))))
150            (t2 Value (ushr ty t1 (iconst_u ty (u32_sub (ty_bits ty) k))))
151            (t3 Value (iadd ty x t2))
152            (t4 Value (band ty t3 (iconst_s ty (i64_wrapping_neg (i64_shl 1 k)))))
153            (t5 Value (isub ty x t4)))
154        t5))
155
156;; General cases for `urem` with constant divisors.
157(rule (simplify_skeleton (urem x (iconst_u $I32 (u64_extract_non_zero (u32_from_u64 d)))))
158      (if-let false (u32_is_power_of_two d))
159      (apply_div_const_magic_u32 (Opcode.Urem) x d))
160(rule (simplify_skeleton (urem x (iconst_u $I64 (u64_extract_non_zero d))))
161      (if-let false (u64_is_power_of_two d))
162      (apply_div_const_magic_u64 (Opcode.Urem) x d))
163
164;; General cases for `srem` with constant divisors.
165(rule (simplify_skeleton (srem x (iconst_s $I32 (i64_extract_non_zero (i32_from_i64 d)))))
166      (if-let false (i64_is_any_sign_power_of_two d))
167      (apply_div_const_magic_s32 (Opcode.Srem) x d))
168(rule (simplify_skeleton (srem x (iconst_s $I64 (i64_extract_non_zero d))))
169      (if-let false (i64_is_any_sign_power_of_two d))
170      (apply_div_const_magic_s64 (Opcode.Srem) x d))
171
172;; x*2 == x+x.
173(rule (simplify (imul ty x (iconst_u _ 2)))
174      (iadd ty x x))
175
176;; x*c == x<<log2(c) when c is a power of two.
177;;
178;; Note that the type of `iconst` must be the same as the type of `imul`,
179;; so these rules can only fire in situations where it's safe to construct an
180;; `iconst` of that type.
181(rule (simplify (imul ty x (iconst _ (imm64_power_of_two c))))
182      (ishl ty x (iconst ty (imm64 c))))
183(rule (simplify (imul ty (iconst _ (imm64_power_of_two c)) x))
184      (ishl ty x (iconst ty (imm64 c))))
185
186;; fneg(fneg(x)) == x.
187(rule (simplify (fneg ty (fneg ty x))) (subsume x))
188
189;; If both of the multiplied arguments to an `fma` are negated then remove
190;; both of them since they cancel out.
191(rule (simplify (fma ty (fneg ty x) (fneg ty y) z))
192      (fma ty x y z))
193
194;; If both of the multiplied arguments to an `fmul` are negated then remove
195;; both of them since they cancel out.
196(rule (simplify (fmul ty (fneg ty x) (fneg ty y)))
197      (fmul ty x y))
198
199;; Detect people open-coding `mulhi`: (x as big * y as big) >> bits
200;; LLVM doesn't have an intrinsic for it, so you'll see it in code like
201;; <https://github.com/rust-lang/rust/blob/767453eb7ca188e991ac5568c17b984dd4893e77/library/core/src/num/mod.rs#L174-L180>
202(rule (simplify (sshr ty (imul ty (sextend _ x@(value_type half_ty))
203                                  (sextend _ y@(value_type half_ty)))
204                         (iconst_u _ k)))
205      (if-let true (ty_equal half_ty (ty_half_width ty)))
206      (if-let true (u64_eq k (ty_bits_u64 half_ty)))
207      (sextend ty (smulhi half_ty x y)))
208(rule (simplify (ushr ty (imul ty (uextend _ x@(value_type half_ty))
209                                  (uextend _ y@(value_type half_ty)))
210                         (iconst_u _ k)))
211      (if-let true (ty_equal half_ty (ty_half_width ty)))
212      (if-let true (u64_eq k (ty_bits_u64 half_ty)))
213      (uextend ty (umulhi half_ty x y)))
214
215;; Cranelift's `fcvt_from_{u,s}int` instructions are polymorphic over the input
216;; type so remove any unnecessary `uextend` or `sextend` to give backends
217;; the chance to convert from the smallest integral type to the float. This
218;; can help lowerings on x64 for example which has a less efficient u64-to-float
219;; conversion than other bit widths.
220(rule (simplify (fcvt_from_uint ty (uextend _ val)))
221      (fcvt_from_uint ty val))
222(rule (simplify (fcvt_from_sint ty (sextend _ val)))
223      (fcvt_from_sint ty val))
224
225
226;; or(x, C) + (-C)  -->  and(x, ~C)
227(rule
228  (simplify (iadd ty
229              (bor ty x (iconst_s ty n))
230              (iconst_s ty m)))
231  (if-let m (i64_checked_neg n))
232  (band ty x (iconst ty (imm64_masked ty (i64_cast_unsigned (i64_not n))))))
233
234;; (x + y) - (x | y) --> x & y
235(rule (simplify (isub ty (iadd ty x y) (bor ty x y))) (band ty x y))
236
237;; x * (1 << y) == x << y
238(rule (simplify (imul ty x (ishl ty (iconst_s ty 1) y))) (ishl ty x y))
239
240;; (x - y) + x --> x
241(rule (simplify (iadd ty (isub ty x y) y)) x)
242(rule (simplify (iadd ty y (isub ty x y))) x)
243
244;; (x + y) - y --> x
245(rule (simplify (isub ty (iadd ty x y) x)) y)
246(rule (simplify (isub ty (iadd ty x y) y)) x)
247
248;; (x - y) - x => -y
249(rule (simplify (isub ty (isub ty x y) x))(ineg ty y))
250
251;; (x * C) (==/!=) D --> x (==/!=) (D / C) when C is odd and divides D
252(rule
253  (simplify (ne ty (iconst_u ty1 x) (imul ty1 y (iconst_u ty1 z))))
254  (if-let 0 (u64_checked_rem x z))
255  (if-let 1 (u64_rem z 2))
256  (ne ty y (iconst ty1 (imm64 (u64_div x z)))))
257(rule
258  (simplify (ne ty (iconst_u ty1 x) (imul ty1 (iconst_u ty1 y) z)))
259  (if-let 0 (u64_checked_rem x y))
260  (if-let 1 (u64_rem y 2))
261  (ne ty z (iconst ty1 (imm64 (u64_div x y)))))
262(rule
263  (simplify (ne ty (imul ty1 x (iconst_u ty1 y)) (iconst_u ty1 z)))
264  (if-let 0 (u64_checked_rem z y))
265  (if-let 1 (u64_rem y 2))
266  (ne ty x (iconst ty1 (imm64 (u64_div z y)))))
267(rule
268  (simplify (ne ty (imul ty1 (iconst_u ty1 x) y) (iconst_u ty1 z)))
269  (if-let 0 (u64_checked_rem z x))
270  (if-let 1 (u64_rem x 2))
271  (ne ty y (iconst ty1 (imm64 (u64_div z x)))))
272
273
274(rule
275  (simplify (eq ty (iconst_u ty1 x) (imul ty1 y (iconst_u ty1 z))))
276  (if-let 0 (u64_checked_rem x z))
277  (if-let 1 (u64_rem z 2))
278  (eq ty y (iconst ty1 (imm64 (u64_div x z)))))
279(rule
280  (simplify (eq ty (iconst_u ty1 x) (imul ty1 (iconst_u ty1 y) z)))
281  (if-let 0 (u64_checked_rem x y))
282  (if-let 1 (u64_rem y 2))
283  (eq ty z (iconst ty1 (imm64 (u64_div x y)))))
284(rule
285  (simplify (eq ty (imul ty1 x (iconst_u ty1 y)) (iconst_u ty1 z)))
286  (if-let 0 (u64_checked_rem z y))
287  (if-let 1 (u64_rem y 2))
288  (eq ty x (iconst ty1 (imm64 (u64_div z y)))))
289(rule
290  (simplify (eq ty (imul ty1 (iconst_u ty1 x) y) (iconst_u ty1 z)))
291  (if-let 0 (u64_checked_rem z x))
292  (if-let 1 (u64_rem x 2))
293  (eq ty y (iconst ty1 (imm64 (u64_div z x)))))
294