1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -loop-vectorize -dce -instcombine -force-vector-interleave=1 -force-vector-width=8 -S < %s | FileCheck %s 3 4define i8 @reduction_and_trunc(i8* noalias nocapture %ptr) { 5; CHECK-LABEL: @reduction_and_trunc( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 8; CHECK: vector.ph: 9; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 10; CHECK: vector.body: 11; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 12; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] 13; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 14; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP0]] 15; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i8>* 16; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP2]], align 1 17; CHECK-NEXT: [[TMP3]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]] 18; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 19; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 20; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 21; CHECK: middle.block: 22; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP3]]) 23; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 24; CHECK: scalar.ph: 25; CHECK-NEXT: br label [[FOR_BODY:%.*]] 26; CHECK: for.body: 27; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 28; CHECK: for.end: 29; CHECK-NEXT: [[AND_LCSSA_OFF0:%.*]] = phi i8 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] 30; CHECK-NEXT: ret i8 [[AND_LCSSA_OFF0]] 31; 32entry: 33 br label %for.body 34 35for.body: 36 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ] 37 %sum.02p = phi i32 [ %and, %for.body ], [ 0, %entry ] 38 %sum.02 = and i32 %sum.02p, 255 39 %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv 40 %load = load i8, i8* %gep 41 %ext = zext i8 %load to i32 42 %and = and i32 %sum.02, %ext 43 %iv.next = add i32 %iv, 1 44 %exitcond = icmp eq i32 %iv.next, 256 45 br i1 %exitcond, label %for.end, label %for.body 46 47for.end: 48 %ret = trunc i32 %and to i8 49 ret i8 %ret 50} 51 52define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) { 53; CHECK-LABEL: @reduction_or_trunc( 54; CHECK-NEXT: entry: 55; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 56; CHECK: vector.ph: 57; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 58; CHECK: vector.body: 59; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 60; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] 61; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 62; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 [[TMP0]] 63; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <8 x i16>* 64; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP2]], align 2 65; CHECK-NEXT: [[TMP3]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]] 66; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 67; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 68; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 69; CHECK: middle.block: 70; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP3]]) 71; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 72; CHECK: scalar.ph: 73; CHECK-NEXT: br label [[FOR_BODY:%.*]] 74; CHECK: for.body: 75; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 76; CHECK: for.end: 77; CHECK-NEXT: [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] 78; CHECK-NEXT: ret i16 [[XOR_LCSSA_OFF0]] 79; 80entry: 81 br label %for.body 82 83for.body: 84 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ] 85 %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ] 86 %sum.02 = and i32 %sum.02p, 65535 87 %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv 88 %load = load i16, i16* %gep 89 %ext = zext i16 %load to i32 90 %xor = or i32 %sum.02, %ext 91 %iv.next = add i32 %iv, 1 92 %exitcond = icmp eq i32 %iv.next, 256 93 br i1 %exitcond, label %for.end, label %for.body 94 95for.end: 96 %ret = trunc i32 %xor to i16 97 ret i16 %ret 98} 99 100define i16 @reduction_xor_trunc(i16* noalias nocapture %ptr) { 101; CHECK-LABEL: @reduction_xor_trunc( 102; CHECK-NEXT: entry: 103; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 104; CHECK: vector.ph: 105; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 106; CHECK: vector.body: 107; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 108; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] 109; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 110; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 [[TMP0]] 111; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <8 x i16>* 112; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP2]], align 2 113; CHECK-NEXT: [[TMP3]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]] 114; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 115; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 116; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 117; CHECK: middle.block: 118; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[TMP3]]) 119; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 120; CHECK: scalar.ph: 121; CHECK-NEXT: br label [[FOR_BODY:%.*]] 122; CHECK: for.body: 123; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 124; CHECK: for.end: 125; CHECK-NEXT: [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] 126; CHECK-NEXT: ret i16 [[XOR_LCSSA_OFF0]] 127; 128entry: 129 br label %for.body 130 131for.body: 132 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ] 133 %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ] 134 %sum.02 = and i32 %sum.02p, 65535 135 %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv 136 %load = load i16, i16* %gep 137 %ext = zext i16 %load to i32 138 %xor = xor i32 %sum.02, %ext 139 %iv.next = add i32 %iv, 1 140 %exitcond = icmp eq i32 %iv.next, 256 141 br i1 %exitcond, label %for.end, label %for.body 142 143for.end: 144 %ret = trunc i32 %xor to i16 145 ret i16 %ret 146} 147 148define i8 @reduction_smin_trunc(i8* noalias nocapture %ptr) { 149; CHECK-LABEL: @reduction_smin_trunc( 150; CHECK-NOT: vector.body 151; CHECK-NOT: <8 x 152; CHECK: ret 153entry: 154 br label %for.body 155 156for.body: 157 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ] 158 %sum.02p = phi i32 [ %min, %for.body ], [ 256, %entry ] 159 %sum.02 = and i32 %sum.02p, 255 160 %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv 161 %load = load i8, i8* %gep 162 %ext = sext i8 %load to i32 163 %icmp = icmp slt i32 %sum.02, %ext 164 %min = select i1 %icmp, i32 %sum.02, i32 %ext 165 %iv.next = add i32 %iv, 1 166 %exitcond = icmp eq i32 %iv.next, 256 167 br i1 %exitcond, label %for.end, label %for.body 168 169for.end: 170 %ret = trunc i32 %min to i8 171 ret i8 %ret 172} 173 174define i8 @reduction_umin_trunc(i8* noalias nocapture %ptr) { 175; CHECK-LABEL: @reduction_umin_trunc( 176; CHECK-NOT: vector.body 177; CHECK-NOT: <8 x 178; CHECK: ret 179entry: 180 br label %for.body 181 182for.body: 183 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ] 184 %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ] 185 %sum.02 = and i32 %sum.02p, 255 186 %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv 187 %load = load i8, i8* %gep 188 %ext = zext i8 %load to i32 189 %icmp = icmp ult i32 %sum.02, %ext 190 %min = select i1 %icmp, i32 %sum.02, i32 %ext 191 %iv.next = add i32 %iv, 1 192 %exitcond = icmp eq i32 %iv.next, 256 193 br i1 %exitcond, label %for.end, label %for.body 194 195for.end: 196 %ret = trunc i32 %min to i8 197 ret i8 %ret 198} 199 200define i16 @reduction_smax_trunc(i16* noalias nocapture %ptr) { 201; CHECK-LABEL: @reduction_smax_trunc( 202; CHECK-NOT: vector.body 203; CHECK-NOT: <8 x 204; CHECK: ret 205entry: 206 br label %for.body 207 208for.body: 209 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ] 210 %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ] 211 %sum.02 = and i32 %sum.02p, 65535 212 %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv 213 %load = load i16, i16* %gep 214 %ext = sext i16 %load to i32 215 %icmp = icmp sgt i32 %sum.02, %ext 216 %min = select i1 %icmp, i32 %sum.02, i32 %ext 217 %iv.next = add i32 %iv, 1 218 %exitcond = icmp eq i32 %iv.next, 256 219 br i1 %exitcond, label %for.end, label %for.body 220 221for.end: 222 %ret = trunc i32 %min to i16 223 ret i16 %ret 224} 225 226define i16 @reduction_umax_trunc(i16* noalias nocapture %ptr) { 227; CHECK-LABEL: @reduction_umax_trunc( 228; CHECK-NOT: vector.body 229; CHECK-NOT: <8 x 230; CHECK: ret 231entry: 232 br label %for.body 233 234for.body: 235 %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ] 236 %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ] 237 %sum.02 = and i32 %sum.02p, 65535 238 %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv 239 %load = load i16, i16* %gep 240 %ext = zext i16 %load to i32 241 %icmp = icmp ugt i32 %sum.02, %ext 242 %min = select i1 %icmp, i32 %sum.02, i32 %ext 243 %iv.next = add i32 %iv, 1 244 %exitcond = icmp eq i32 %iv.next, 256 245 br i1 %exitcond, label %for.end, label %for.body 246 247for.end: 248 %ret = trunc i32 %min to i16 249 ret i16 %ret 250} 251