1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instsimplify -S | FileCheck %s 3 4; If any bits of the shift amount are known to make it exceed or equal 5; the number of bits in the type, the shift causes undefined behavior. 6 7define i32 @shl_amount_is_known_bogus(i32 %a, i32 %b) { 8; CHECK-LABEL: @shl_amount_is_known_bogus( 9; CHECK-NEXT: ret i32 poison 10; 11 %or = or i32 %b, 32 12 %shl = shl i32 %a, %or 13 ret i32 %shl 14} 15 16; Check some weird types and the other shift ops. 17 18define i31 @lshr_amount_is_known_bogus(i31 %a, i31 %b) { 19; CHECK-LABEL: @lshr_amount_is_known_bogus( 20; CHECK-NEXT: ret i31 poison 21; 22 %or = or i31 %b, 31 23 %shr = lshr i31 %a, %or 24 ret i31 %shr 25} 26 27define i33 @ashr_amount_is_known_bogus(i33 %a, i33 %b) { 28; CHECK-LABEL: @ashr_amount_is_known_bogus( 29; CHECK-NEXT: ret i33 poison 30; 31 %or = or i33 %b, 33 32 %shr = ashr i33 %a, %or 33 ret i33 %shr 34} 35 36 37; If all valid bits of the shift amount are known 0, there's no shift. 38; It doesn't matter if high bits are set because that would be undefined. 39; Therefore, the only possible valid result of these shifts is %a. 40 41define i16 @ashr_amount_is_zero(i16 %a, i16 %b) { 42; CHECK-LABEL: @ashr_amount_is_zero( 43; CHECK-NEXT: ret i16 [[A:%.*]] 44; 45 %and = and i16 %b, 65520 ; 0xfff0 46 %shr = ashr i16 %a, %and 47 ret i16 %shr 48} 49 50define i300 @lshr_amount_is_zero(i300 %a, i300 %b) { 51; CHECK-LABEL: @lshr_amount_is_zero( 52; CHECK-NEXT: ret i300 [[A:%.*]] 53; 54 %and = and i300 %b, 2048 55 %shr = lshr i300 %a, %and 56 ret i300 %shr 57} 58 59define i9 @shl_amount_is_zero(i9 %a, i9 %b) { 60; CHECK-LABEL: @shl_amount_is_zero( 61; CHECK-NEXT: ret i9 [[A:%.*]] 62; 63 %and = and i9 %b, 496 ; 0x1f0 64 %shl = shl i9 %a, %and 65 ret i9 %shl 66} 67 68 69; Verify that we've calculated the log2 boundary of valid bits correctly for a weird type. 70 71define i9 @shl_amount_is_not_known_zero(i9 %a, i9 %b) { 72; CHECK-LABEL: @shl_amount_is_not_known_zero( 73; CHECK-NEXT: [[AND:%.*]] = and i9 [[B:%.*]], -8 74; CHECK-NEXT: [[SHL:%.*]] = shl i9 [[A:%.*]], [[AND]] 75; CHECK-NEXT: ret i9 [[SHL]] 76; 77 %and = and i9 %b, 504 ; 0x1f8 78 %shl = shl i9 %a, %and 79 ret i9 %shl 80} 81 82 83; For vectors, we need all scalar elements to meet the requirements to optimize. 84 85define <2 x i32> @ashr_vector_bogus(<2 x i32> %a, <2 x i32> %b) { 86; CHECK-LABEL: @ashr_vector_bogus( 87; CHECK-NEXT: ret <2 x i32> poison 88; 89 %or = or <2 x i32> %b, <i32 32, i32 32> 90 %shr = ashr <2 x i32> %a, %or 91 ret <2 x i32> %shr 92} 93 94; FIXME: This is undef, but computeKnownBits doesn't handle the union. 95define <2 x i32> @shl_vector_bogus(<2 x i32> %a, <2 x i32> %b) { 96; CHECK-LABEL: @shl_vector_bogus( 97; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[B:%.*]], <i32 32, i32 64> 98; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[OR]] 99; CHECK-NEXT: ret <2 x i32> [[SHL]] 100; 101 %or = or <2 x i32> %b, <i32 32, i32 64> 102 %shl = shl <2 x i32> %a, %or 103 ret <2 x i32> %shl 104} 105 106define <2 x i32> @lshr_vector_zero(<2 x i32> %a, <2 x i32> %b) { 107; CHECK-LABEL: @lshr_vector_zero( 108; CHECK-NEXT: ret <2 x i32> [[A:%.*]] 109; 110 %and = and <2 x i32> %b, <i32 64, i32 256> 111 %shr = lshr <2 x i32> %a, %and 112 ret <2 x i32> %shr 113} 114 115; Make sure that weird vector types work too. 116define <2 x i15> @shl_vector_zero(<2 x i15> %a, <2 x i15> %b) { 117; CHECK-LABEL: @shl_vector_zero( 118; CHECK-NEXT: ret <2 x i15> [[A:%.*]] 119; 120 %and = and <2 x i15> %b, <i15 1024, i15 1024> 121 %shl = shl <2 x i15> %a, %and 122 ret <2 x i15> %shl 123} 124 125define <2 x i32> @shl_vector_for_real(<2 x i32> %a, <2 x i32> %b) { 126; CHECK-LABEL: @shl_vector_for_real( 127; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[B:%.*]], <i32 3, i32 3> 128; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[AND]] 129; CHECK-NEXT: ret <2 x i32> [[SHL]] 130; 131 %and = and <2 x i32> %b, <i32 3, i32 3> ; a necessary mask op 132 %shl = shl <2 x i32> %a, %and 133 ret <2 x i32> %shl 134} 135 136 137; We calculate the valid bits of the shift using log2, and log2 of 1 (the type width) is 0. 138; That should be ok. Either the shift amount is 0 or invalid (1), so we can always return %a. 139 140define i1 @shl_i1(i1 %a, i1 %b) { 141; CHECK-LABEL: @shl_i1( 142; CHECK-NEXT: ret i1 [[A:%.*]] 143; 144 %shl = shl i1 %a, %b 145 ret i1 %shl 146} 147 148; The following cases only get folded by InstCombine, 149; see InstCombine/lshr.ll. 150 151declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone 152declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone 153declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) nounwind readnone 154declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone 155 156define i32 @lshr_ctlz_zero_is_undef(i32 %x) { 157; CHECK-LABEL: @lshr_ctlz_zero_is_undef( 158; CHECK-NEXT: [[CT:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true) 159; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[CT]], 5 160; CHECK-NEXT: ret i32 [[SH]] 161; 162 %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 true) 163 %sh = lshr i32 %ct, 5 164 ret i32 %sh 165} 166 167define i32 @lshr_cttz_zero_is_undef(i32 %x) { 168; CHECK-LABEL: @lshr_cttz_zero_is_undef( 169; CHECK-NEXT: [[CT:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true) 170; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[CT]], 5 171; CHECK-NEXT: ret i32 [[SH]] 172; 173 %ct = call i32 @llvm.cttz.i32(i32 %x, i1 true) 174 %sh = lshr i32 %ct, 5 175 ret i32 %sh 176} 177 178define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) { 179; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec( 180; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true) 181; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3> 182; CHECK-NEXT: ret <2 x i8> [[SH]] 183; 184 %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true) 185 %sh = lshr <2 x i8> %ct, <i8 3, i8 3> 186 ret <2 x i8> %sh 187} 188 189define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) { 190; CHECK-LABEL: @lshr_ctlz_zero_is_undef_vec( 191; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true) 192; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0> 193; CHECK-NEXT: [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0 194; CHECK-NEXT: ret i8 [[EX]] 195; 196 %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true) 197 %sh = lshr <2 x i8> %ct, <i8 3, i8 0> 198 %ex = extractelement <2 x i8> %sh, i32 0 199 ret i8 %ex 200} 201 202define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) { 203; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec( 204; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true) 205; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3> 206; CHECK-NEXT: ret <2 x i8> [[SH]] 207; 208 %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true) 209 %sh = lshr <2 x i8> %ct, <i8 3, i8 3> 210 ret <2 x i8> %sh 211} 212 213define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) { 214; CHECK-LABEL: @lshr_cttz_zero_is_undef_vec( 215; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true) 216; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0> 217; CHECK-NEXT: [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0 218; CHECK-NEXT: ret i8 [[EX]] 219; 220 %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true) 221 %sh = lshr <2 x i8> %ct, <i8 3, i8 0> 222 %ex = extractelement <2 x i8> %sh, i32 0 223 ret i8 %ex 224} 225 226