1;; rewrites for integer and floating-point arithmetic 2;; eg: `iadd`, `isub`, `ineg`, `imul`, `fadd`, `fsub`, `fmul` 3 4;; For commutative instructions, we depend on cprop.isle pushing immediates to 5;; the right, and thus only simplify patterns like `x+0`, not `0+x`. 6 7;; x+0 == x. 8(rule (simplify (iadd ty 9 x 10 (iconst_u ty 0))) 11 (subsume x)) 12;; x-0 == x. 13(rule (simplify (isub ty 14 x 15 (iconst_u ty 0))) 16 (subsume x)) 17;; 0-x == (ineg x). 18(rule (simplify (isub ty 19 (iconst_u ty 0) 20 x)) 21 (ineg ty x)) 22 23;; x + -y == -y + x == -(y - x) == x - y 24(rule (simplify (iadd ty x (ineg ty y))) 25 (isub ty x y)) 26(rule (simplify (iadd ty (ineg ty y) x)) 27 (isub ty x y)) 28(rule (simplify (ineg ty (isub ty y x))) 29 (isub ty x y)) 30;; x - -y == x + y 31(rule (simplify (isub ty x (ineg ty y))) 32 (iadd ty x y)) 33 34;; ineg(ineg(x)) == x. 35(rule (simplify (ineg ty (ineg ty x))) (subsume x)) 36 37;; ineg(x) * ineg(y) == x*y. 38(rule (simplify (imul ty (ineg ty x) (ineg ty y))) 39 (subsume (imul ty x y))) 40 41;; iabs(ineg(x)) == iabs(x). 42(rule (simplify (iabs ty (ineg ty x))) 43 (iabs ty x)) 44 45;; iabs(iabs(x)) == iabs(x). 46(rule (simplify (iabs ty inner @ (iabs ty x))) 47 (subsume inner)) 48 49;; x-x == 0. 50(rule (simplify (isub (ty_int ty) x x)) (subsume (iconst_u ty 0))) 51 52;; x*1 == x. 53(rule (simplify (imul ty 54 x 55 (iconst_u ty 1))) 56 (subsume x)) 57 58;; x*0 == 0. 59(rule (simplify (imul ty 60 _ 61 zero @ (iconst_u ty 0))) 62 (subsume zero)) 63 64;; x*-1 == ineg(x). 65(rule (simplify (imul ty x (iconst_s ty -1))) 66 (ineg ty x)) 67 68;; (!x) + 1 == ineg(x) 69(rule (simplify (iadd ty (bnot ty x) (iconst_u ty 1))) 70 (ineg ty x)) 71 72;; !(x - 1) == !(x + (-1)) == ineg(x) 73(rule (simplify (bnot ty (isub ty x (iconst_s ty 1)))) 74 (ineg ty x)) 75(rule (simplify (bnot ty (iadd ty x (iconst_s ty -1)))) 76 (ineg ty x)) 77 78;; x/1 == x. 79(rule (simplify (sdiv ty 80 x 81 (iconst_u ty 1))) 82 (subsume x)) 83(rule (simplify (udiv ty 84 x 85 (iconst_u ty 1))) 86 (subsume x)) 87 88;; TODO: strength reduction: div to shifts 89;; TODO: div/rem by constants -> magic multiplications 90 91;; x*2 == x+x. 92(rule (simplify (imul ty x (iconst_u _ 2))) 93 (iadd ty x x)) 94 95;; x*c == x<<log2(c) when c is a power of two. 96;; Note that the type of `iconst` must be the same as the type of `imul`, 97;; so these rules can only fire in situations where it's safe to construct an 98;; `iconst` of that type. 99(rule (simplify (imul ty x (iconst _ (imm64_power_of_two c)))) 100 (ishl ty x (iconst ty (imm64 c)))) 101(rule (simplify (imul ty (iconst _ (imm64_power_of_two c)) x)) 102 (ishl ty x (iconst ty (imm64 c)))) 103 104;; fneg(fneg(x)) == x. 105(rule (simplify (fneg ty (fneg ty x))) (subsume x)) 106 107;; If both of the multiplied arguments to an `fma` are negated then remove 108;; both of them since they cancel out. 109(rule (simplify (fma ty (fneg ty x) (fneg ty y) z)) 110 (fma ty x y z)) 111 112;; If both of the multiplied arguments to an `fmul` are negated then remove 113;; both of them since they cancel out. 114(rule (simplify (fmul ty (fneg ty x) (fneg ty y))) 115 (fmul ty x y)) 116 117;; (a op (b op (c op d))) ==> ((a op b) op (c op d)) 118;; 119;; and 120;; 121;; (((a op b) op c) op d) ==> ((a op b) op (c op d)) 122;; 123;; where `op` is an associative operation: `iadd`, `imul`, `band`, or `bxor`. 124;; 125;; This increases instruction-level parallelism and shrinks live ranges. It also 126;; canonicalizes into the shallow-and-wide form for reassociating constants 127;; together for cprop. 128;; 129;; NB: We subsume to avoid exponential e-node blow up due to reassociating very 130;; large chains of operations. 131;; 132;; TODO: We should add `bor` rules for this as well. Unfortunately, they 133;; conflict with our `bswap` recognizing rules when we `subsume`. 134 135(rule (simplify (iadd ty a (iadd ty b (iadd ty c d)))) 136 (subsume (iadd ty (iadd ty a b) (iadd ty c d)))) 137(rule (simplify (iadd ty (iadd ty (iadd ty a b) c) d)) 138 (subsume (iadd ty (iadd ty a b) (iadd ty c d)))) 139 140(rule (simplify (imul ty a (imul ty b (imul ty c d)))) 141 (subsume (imul ty (imul ty a b) (imul ty c d)))) 142(rule (simplify (imul ty (imul ty (imul ty a b) c) d)) 143 (subsume (imul ty (imul ty a b) (imul ty c d)))) 144 145(rule (simplify (band ty a (band ty b (band ty c d)))) 146 (subsume (band ty (band ty a b) (band ty c d)))) 147(rule (simplify (band ty (band ty (band ty a b) c) d)) 148 (subsume (band ty (band ty a b) (band ty c d)))) 149 150(rule (simplify (bxor ty a (bxor ty b (bxor ty c d)))) 151 (subsume (bxor ty (bxor ty a b) (bxor ty c d)))) 152(rule (simplify (bxor ty (bxor ty (bxor ty a b) c) d)) 153 (subsume (bxor ty (bxor ty a b) (bxor ty c d)))) 154 155 156;; Similar rules but for associating combinations of + and - 157 158;; a -(b-(c-d)) = (a-b) + (c-d) 159(rule (simplify (isub ty a (isub ty b (isub ty c d)))) 160 (subsume (iadd ty (isub ty a b) (isub ty c d)))) 161 162;; a -(b-(c+d)) = (a-b) + (c+d) 163(rule (simplify (isub ty a (isub ty b (iadd ty c d)))) 164 (subsume (iadd ty (isub ty a b) (iadd ty c d)))) 165 166;; a -(b+(c-d)) = (a-b) - (c-d) 167(rule (simplify (isub ty a (iadd ty b (isub ty c d)))) 168 (subsume (isub ty (isub ty a b) (isub ty c d)))) 169 170;; a -(b+(c+d)) = (a-b) - (c+d) 171(rule (simplify (isub ty a (iadd ty b (iadd ty c d)))) 172 (subsume (isub ty (isub ty a b) (iadd ty c d)))) 173 174;; a +(b-(c-d)) = (a+b) - (c-d) 175(rule (simplify (iadd ty a (isub ty b (isub ty c d)))) 176 (subsume (isub ty (iadd ty a b) (isub ty c d)))) 177 178;; a +(b-(c+d)) = (a+b) - (c+d) 179(rule (simplify (iadd ty a (isub ty b (iadd ty c d)))) 180 (subsume (isub ty (iadd ty a b) (iadd ty c d)))) 181 182;; a +(b+(c-d)) = (a+b) + (c-d) 183(rule (simplify (iadd ty a (iadd ty b (isub ty c d)))) 184 (subsume (iadd ty (iadd ty a b) (isub ty c d)))) 185 186;; and nested the other way 187 188;; ((a-b)-c)-d = (a-b) - (c+d) 189(rule (simplify (isub ty (isub ty (isub ty a b) c) d)) 190 (subsume (isub ty (isub ty a b) (iadd ty c d)))) 191 192;; ((a-b)-c)+d = (a-b) - (c-d) 193(rule (simplify (iadd ty (isub ty (isub ty a b) c) d)) 194 (subsume (isub ty (isub ty a b) (isub ty c d)))) 195 196;; ((a-b)+c)-d = (a-b) + (c-d) 197(rule (simplify (isub ty (iadd ty (isub ty a b) c) d)) 198 (subsume (iadd ty (isub ty a b) (isub ty c d)))) 199 200;; ((a-b)+c)+d = (a-b) + (c+d) 201(rule (simplify (iadd ty (iadd ty (isub ty a b) c) d)) 202 (subsume (iadd ty (isub ty a b) (iadd ty c d)))) 203 204;; ((a+b)-c)-d = (a+b) - (c+d) 205(rule (simplify (isub ty (isub ty (iadd ty a b) c) d)) 206 (subsume (isub ty (iadd ty a b) (iadd ty c d)))) 207 208;; ((a+b)-c)+d = (a+b) - (c-d) 209(rule (simplify (iadd ty (isub ty (iadd ty a b) c) d)) 210 (subsume (isub ty (iadd ty a b) (isub ty c d)))) 211 212;; ((a+b)+c)-d = (a+b) + (c-d) 213(rule (simplify (isub ty (iadd ty (iadd ty a b) c) d)) 214 (subsume (iadd ty (iadd ty a b) (isub ty c d)))) 215 216;; Detect people open-coding `mulhi`: (x as big * y as big) >> bits 217;; LLVM doesn't have an intrinsic for it, so you'll see it in code like 218;; <https://github.com/rust-lang/rust/blob/767453eb7ca188e991ac5568c17b984dd4893e77/library/core/src/num/mod.rs#L174-L180> 219(rule (simplify (sshr ty (imul ty (sextend _ x@(value_type half_ty)) 220 (sextend _ y@(value_type half_ty))) 221 (iconst_u _ k))) 222 (if-let true (ty_equal half_ty (ty_half_width ty))) 223 (if-let true (u64_eq k (ty_bits_u64 half_ty))) 224 (sextend ty (smulhi half_ty x y))) 225(rule (simplify (ushr ty (imul ty (uextend _ x@(value_type half_ty)) 226 (uextend _ y@(value_type half_ty))) 227 (iconst_u _ k))) 228 (if-let true (ty_equal half_ty (ty_half_width ty))) 229 (if-let true (u64_eq k (ty_bits_u64 half_ty))) 230 (uextend ty (umulhi half_ty x y))) 231 232;; Cranelift's `fcvt_from_{u,s}int` instructions are polymorphic over the input 233;; type so remove any unnecessary `uextend` or `sextend` to give backends 234;; the chance to convert from the smallest integral type to the float. This 235;; can help lowerings on x64 for example which has a less efficient u64-to-float 236;; conversion than other bit widths. 237(rule (simplify (fcvt_from_uint ty (uextend _ val))) 238 (fcvt_from_uint ty val)) 239(rule (simplify (fcvt_from_sint ty (sextend _ val))) 240 (fcvt_from_sint ty val)) 241