1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -O2 -expand-reductions -mattr=avx -S < %s | FileCheck %s 3 4; Test if SLP vector reduction patterns are recognized 5; and optionally converted to reduction intrinsics and 6; back to raw IR. 7 8target triple = "x86_64--" 9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 10 11define i32 @add_v4i32(i32* %p) #0 { 12; CHECK-LABEL: @add_v4i32( 13; CHECK-NEXT: entry: 14; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* 15; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0:![0-9]+]] 16; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 17; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[RDX_SHUF]] 18; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 19; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF3]] 20; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[BIN_RDX4]], i32 0 21; CHECK-NEXT: ret i32 [[TMP2]] 22; 23entry: 24 br label %for.cond 25 26for.cond: 27 %r.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ] 28 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 29 %cmp = icmp slt i32 %i.0, 4 30 br i1 %cmp, label %for.body, label %for.cond.cleanup 31 32for.cond.cleanup: 33 br label %for.end 34 35for.body: 36 %idxprom = sext i32 %i.0 to i64 37 %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom 38 %0 = load i32, i32* %arrayidx, align 4, !tbaa !3 39 %add = add nsw i32 %r.0, %0 40 br label %for.inc 41 42for.inc: 43 %inc = add nsw i32 %i.0, 1 44 br label %for.cond 45 46for.end: 47 ret i32 %r.0 48} 49 50define signext i16 @mul_v8i16(i16* %p) #0 { 51; CHECK-LABEL: @mul_v8i16( 52; CHECK-NEXT: entry: 53; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[P:%.*]] to <8 x i16>* 54; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2, !tbaa [[TBAA4:![0-9]+]] 55; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 56; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <8 x i16> [[TMP1]], [[RDX_SHUF]] 57; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 58; CHECK-NEXT: [[BIN_RDX4:%.*]] = mul <8 x i16> [[BIN_RDX]], [[RDX_SHUF3]] 59; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX4]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 60; CHECK-NEXT: [[BIN_RDX6:%.*]] = mul <8 x i16> [[BIN_RDX4]], [[RDX_SHUF5]] 61; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i16> [[BIN_RDX6]], i32 0 62; CHECK-NEXT: ret i16 [[TMP2]] 63; 64entry: 65 br label %for.cond 66 67for.cond: 68 %r.0 = phi i16 [ 1, %entry ], [ %conv2, %for.inc ] 69 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 70 %cmp = icmp slt i32 %i.0, 8 71 br i1 %cmp, label %for.body, label %for.cond.cleanup 72 73for.cond.cleanup: 74 br label %for.end 75 76for.body: 77 %idxprom = sext i32 %i.0 to i64 78 %arrayidx = getelementptr inbounds i16, i16* %p, i64 %idxprom 79 %0 = load i16, i16* %arrayidx, align 2, !tbaa !7 80 %conv = sext i16 %0 to i32 81 %conv1 = sext i16 %r.0 to i32 82 %mul = mul nsw i32 %conv1, %conv 83 %conv2 = trunc i32 %mul to i16 84 br label %for.inc 85 86for.inc: 87 %inc = add nsw i32 %i.0, 1 88 br label %for.cond 89 90for.end: 91 ret i16 %r.0 92} 93 94define signext i8 @or_v16i8(i8* %p) #0 { 95; CHECK-LABEL: @or_v16i8( 96; CHECK-NEXT: entry: 97; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[P:%.*]] to <16 x i8>* 98; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1, !tbaa [[TBAA6:![0-9]+]] 99; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 100; CHECK-NEXT: [[BIN_RDX:%.*]] = or <16 x i8> [[TMP1]], [[RDX_SHUF]] 101; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 102; CHECK-NEXT: [[BIN_RDX5:%.*]] = or <16 x i8> [[BIN_RDX]], [[RDX_SHUF4]] 103; CHECK-NEXT: [[RDX_SHUF6:%.*]] = shufflevector <16 x i8> [[BIN_RDX5]], <16 x i8> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 104; CHECK-NEXT: [[BIN_RDX7:%.*]] = or <16 x i8> [[BIN_RDX5]], [[RDX_SHUF6]] 105; CHECK-NEXT: [[RDX_SHUF8:%.*]] = shufflevector <16 x i8> [[BIN_RDX7]], <16 x i8> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 106; CHECK-NEXT: [[BIN_RDX9:%.*]] = or <16 x i8> [[BIN_RDX7]], [[RDX_SHUF8]] 107; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[BIN_RDX9]], i32 0 108; CHECK-NEXT: ret i8 [[TMP2]] 109; 110entry: 111 br label %for.cond 112 113for.cond: 114 %r.0 = phi i8 [ 0, %entry ], [ %conv2, %for.inc ] 115 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 116 %cmp = icmp slt i32 %i.0, 16 117 br i1 %cmp, label %for.body, label %for.cond.cleanup 118 119for.cond.cleanup: 120 br label %for.end 121 122for.body: 123 %idxprom = sext i32 %i.0 to i64 124 %arrayidx = getelementptr inbounds i8, i8* %p, i64 %idxprom 125 %0 = load i8, i8* %arrayidx, align 1, !tbaa !9 126 %conv = sext i8 %0 to i32 127 %conv1 = sext i8 %r.0 to i32 128 %or = or i32 %conv1, %conv 129 %conv2 = trunc i32 %or to i8 130 br label %for.inc 131 132for.inc: 133 %inc = add nsw i32 %i.0, 1 134 br label %for.cond 135 136for.end: 137 ret i8 %r.0 138} 139 140define i32 @smin_v4i32(i32* %p) #0 { 141; CHECK-LABEL: @smin_v4i32( 142; CHECK-NEXT: entry: 143; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* 144; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0]] 145; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 146; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP1]], [[RDX_SHUF]] 147; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]] 148; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 149; CHECK-NEXT: [[RDX_MINMAX_CMP4:%.*]] = icmp slt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]] 150; CHECK-NEXT: [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]] 151; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0 152; CHECK-NEXT: ret i32 [[TMP2]] 153; 154entry: 155 br label %for.cond 156 157for.cond: 158 %r.0 = phi i32 [ 2147483647, %entry ], [ %cond, %for.inc ] 159 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 160 %cmp = icmp slt i32 %i.0, 4 161 br i1 %cmp, label %for.body, label %for.cond.cleanup 162 163for.cond.cleanup: 164 br label %for.end 165 166for.body: 167 %idxprom = sext i32 %i.0 to i64 168 %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom 169 %0 = load i32, i32* %arrayidx, align 4, !tbaa !3 170 %cmp1 = icmp slt i32 %0, %r.0 171 br i1 %cmp1, label %cond.true, label %cond.false 172 173cond.true: 174 %idxprom2 = sext i32 %i.0 to i64 175 %arrayidx3 = getelementptr inbounds i32, i32* %p, i64 %idxprom2 176 %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3 177 br label %cond.end 178 179cond.false: 180 br label %cond.end 181 182cond.end: 183 %cond = phi i32 [ %1, %cond.true ], [ %r.0, %cond.false ] 184 br label %for.inc 185 186for.inc: 187 %inc = add nsw i32 %i.0, 1 188 br label %for.cond 189 190for.end: 191 ret i32 %r.0 192} 193 194define i32 @umax_v4i32(i32* %p) #0 { 195; CHECK-LABEL: @umax_v4i32( 196; CHECK-NEXT: entry: 197; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* 198; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0]] 199; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 200; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[RDX_SHUF]] 201; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]] 202; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 203; CHECK-NEXT: [[RDX_MINMAX_CMP4:%.*]] = icmp ugt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]] 204; CHECK-NEXT: [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]] 205; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0 206; CHECK-NEXT: ret i32 [[TMP2]] 207; 208entry: 209 br label %for.cond 210 211for.cond: 212 %r.0 = phi i32 [ 0, %entry ], [ %cond, %for.inc ] 213 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 214 %cmp = icmp slt i32 %i.0, 4 215 br i1 %cmp, label %for.body, label %for.cond.cleanup 216 217for.cond.cleanup: 218 br label %for.end 219 220for.body: 221 %idxprom = sext i32 %i.0 to i64 222 %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom 223 %0 = load i32, i32* %arrayidx, align 4, !tbaa !3 224 %cmp1 = icmp ugt i32 %0, %r.0 225 br i1 %cmp1, label %cond.true, label %cond.false 226 227cond.true: 228 %idxprom2 = sext i32 %i.0 to i64 229 %arrayidx3 = getelementptr inbounds i32, i32* %p, i64 %idxprom2 230 %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3 231 br label %cond.end 232 233cond.false: 234 br label %cond.end 235 236cond.end: 237 %cond = phi i32 [ %1, %cond.true ], [ %r.0, %cond.false ] 238 br label %for.inc 239 240for.inc: 241 %inc = add nsw i32 %i.0, 1 242 br label %for.cond 243 244for.end: 245 ret i32 %r.0 246} 247 248define float @fadd_v4i32(float* %p) #0 { 249; CHECK-LABEL: @fadd_v4i32( 250; CHECK-NEXT: entry: 251; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* 252; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7:![0-9]+]] 253; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 254; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]] 255; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 256; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]] 257; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0 258; CHECK-NEXT: [[BIN_RDX5:%.*]] = fadd fast float 4.200000e+01, [[TMP2]] 259; CHECK-NEXT: ret float [[BIN_RDX5]] 260; 261entry: 262 br label %for.cond 263 264for.cond: 265 %r.0 = phi float [ 4.200000e+01, %entry ], [ %add, %for.inc ] 266 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 267 %cmp = icmp slt i32 %i.0, 4 268 br i1 %cmp, label %for.body, label %for.cond.cleanup 269 270for.cond.cleanup: 271 br label %for.end 272 273for.body: 274 %idxprom = sext i32 %i.0 to i64 275 %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom 276 %0 = load float, float* %arrayidx, align 4, !tbaa !10 277 %add = fadd fast float %r.0, %0 278 br label %for.inc 279 280for.inc: 281 %inc = add nsw i32 %i.0, 1 282 br label %for.cond 283 284for.end: 285 ret float %r.0 286} 287 288define float @fmul_v4i32(float* %p) #0 { 289; CHECK-LABEL: @fmul_v4i32( 290; CHECK-NEXT: entry: 291; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* 292; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7]] 293; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 294; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[TMP1]], [[RDX_SHUF]] 295; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 296; CHECK-NEXT: [[BIN_RDX4:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]] 297; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0 298; CHECK-NEXT: [[BIN_RDX5:%.*]] = fmul fast float 1.000000e+00, [[TMP2]] 299; CHECK-NEXT: [[OP_EXTRA:%.*]] = fmul fast float [[BIN_RDX5]], 4.200000e+01 300; CHECK-NEXT: ret float [[OP_EXTRA]] 301; 302entry: 303 br label %for.cond 304 305for.cond: 306 %r.0 = phi float [ 4.200000e+01, %entry ], [ %mul, %for.inc ] 307 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 308 %cmp = icmp slt i32 %i.0, 4 309 br i1 %cmp, label %for.body, label %for.cond.cleanup 310 311for.cond.cleanup: 312 br label %for.end 313 314for.body: 315 %idxprom = sext i32 %i.0 to i64 316 %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom 317 %0 = load float, float* %arrayidx, align 4, !tbaa !10 318 %mul = fmul fast float %r.0, %0 319 br label %for.inc 320 321for.inc: 322 %inc = add nsw i32 %i.0, 1 323 br label %for.cond 324 325for.end: 326 ret float %r.0 327} 328 329define float @fmin_v4f32(float* %p) #0 { 330; CHECK-LABEL: @fmin_v4f32( 331; CHECK-NEXT: entry: 332; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* 333; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7]] 334; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 335; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP1]], [[RDX_SHUF]] 336; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP1]], <4 x float> [[RDX_SHUF]] 337; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 338; CHECK-NEXT: [[RDX_MINMAX_CMP4:%.*]] = fcmp fast olt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]] 339; CHECK-NEXT: [[RDX_MINMAX_SELECT5:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP4]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF3]] 340; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT5]], i32 0 341; CHECK-NEXT: ret float [[TMP2]] 342; 343entry: 344 br label %for.cond 345 346for.cond: 347 %r.0 = phi float [ 0x47EFFFFFE0000000, %entry ], [ %cond, %for.inc ] 348 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 349 %cmp = icmp slt i32 %i.0, 4 350 br i1 %cmp, label %for.body, label %for.cond.cleanup 351 352for.cond.cleanup: 353 br label %for.end 354 355for.body: 356 %idxprom = sext i32 %i.0 to i64 357 %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom 358 %0 = load float, float* %arrayidx, align 4, !tbaa !10 359 %cmp1 = fcmp fast olt float %0, %r.0 360 br i1 %cmp1, label %cond.true, label %cond.false 361 362cond.true: 363 %idxprom2 = sext i32 %i.0 to i64 364 %arrayidx3 = getelementptr inbounds float, float* %p, i64 %idxprom2 365 %1 = load float, float* %arrayidx3, align 4, !tbaa !10 366 br label %cond.end 367 368cond.false: 369 br label %cond.end 370 371cond.end: 372 %cond = phi fast float [ %1, %cond.true ], [ %r.0, %cond.false ] 373 br label %for.inc 374 375for.inc: 376 %inc = add nsw i32 %i.0, 1 377 br label %for.cond 378 379for.end: 380 ret float %r.0 381} 382 383define available_externally float @max(float %a, float %b) { 384entry: 385 %a.addr = alloca float, align 4 386 %b.addr = alloca float, align 4 387 store float %a, float* %a.addr, align 4 388 store float %b, float* %b.addr, align 4 389 %0 = load float, float* %a.addr, align 4 390 %1 = load float, float* %b.addr, align 4 391 %cmp = fcmp nnan ninf nsz ogt float %0, %1 392 br i1 %cmp, label %cond.true, label %cond.false 393 394cond.true: ; preds = %entry 395 %2 = load float, float* %a.addr, align 4 396 br label %cond.end 397 398cond.false: ; preds = %entry 399 %3 = load float, float* %b.addr, align 4 400 br label %cond.end 401 402cond.end: ; preds = %cond.false, %cond.true 403 %cond = phi nnan ninf nsz float [ %2, %cond.true ], [ %3, %cond.false ] 404 ret float %cond 405} 406 407; PR23116 408 409define float @findMax(<8 x float>* byval(<8 x float>) align 16 %0) { 410; CHECK-LABEL: @findMax( 411; CHECK-NEXT: entry: 412; CHECK-NEXT: [[V:%.*]] = load <8 x float>, <8 x float>* [[TMP0:%.*]], align 16, !tbaa [[TBAA0]] 413; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[V]], <8 x float> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 414; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[V]], [[RDX_SHUF]] 415; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[V]], <8 x float> [[RDX_SHUF]] 416; CHECK-NEXT: [[RDX_SHUF8:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 417; CHECK-NEXT: [[RDX_MINMAX_CMP9:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF8]] 418; CHECK-NEXT: [[RDX_MINMAX_SELECT10:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP9]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF8]] 419; CHECK-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 420; CHECK-NEXT: [[RDX_MINMAX_CMP12:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT10]], [[RDX_SHUF11]] 421; CHECK-NEXT: [[RDX_MINMAX_SELECT13:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP12]], <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> [[RDX_SHUF11]] 422; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT13]], i32 0 423; CHECK-NEXT: ret float [[TMP1]] 424; 425entry: 426 %v.addr = alloca <8 x float>, align 32 427 %v = load <8 x float>, <8 x float>* %0, align 16, !tbaa !3 428 store <8 x float> %v, <8 x float>* %v.addr, align 32, !tbaa !3 429 %1 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3 430 %vecext = extractelement <8 x float> %1, i32 0 431 %2 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3 432 %vecext1 = extractelement <8 x float> %2, i32 1 433 %call = call nnan ninf nsz float @max(float %vecext, float %vecext1) 434 %3 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3 435 %vecext2 = extractelement <8 x float> %3, i32 2 436 %call3 = call nnan ninf nsz float @max(float %call, float %vecext2) 437 %4 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3 438 %vecext4 = extractelement <8 x float> %4, i32 3 439 %call5 = call nnan ninf nsz float @max(float %call3, float %vecext4) 440 %5 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3 441 %vecext6 = extractelement <8 x float> %5, i32 4 442 %call7 = call nnan ninf nsz float @max(float %call5, float %vecext6) 443 %6 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3 444 %vecext8 = extractelement <8 x float> %6, i32 5 445 %call9 = call nnan ninf nsz float @max(float %call7, float %vecext8) 446 %7 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3 447 %vecext10 = extractelement <8 x float> %7, i32 6 448 %call11 = call nnan ninf nsz float @max(float %call9, float %vecext10) 449 %8 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3 450 %vecext12 = extractelement <8 x float> %8, i32 7 451 %call13 = call nnan ninf nsz float @max(float %call11, float %vecext12) 452 ret float %call13 453} 454 455attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+avx,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="true" "use-soft-float"="false" } 456 457!0 = !{i32 1, !"wchar_size", i32 4} 458!1 = !{i32 7, !"PIC Level", i32 2} 459!2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git a9fe69c359de653015c39e413e48630d069abe27)"} 460!3 = !{!4, !4, i64 0} 461!4 = !{!"int", !5, i64 0} 462!5 = !{!"omnipotent char", !6, i64 0} 463!6 = !{!"Simple C/C++ TBAA"} 464!7 = !{!8, !8, i64 0} 465!8 = !{!"short", !5, i64 0} 466!9 = !{!5, !5, i64 0} 467!10 = !{!11, !11, i64 0} 468!11 = !{!"float", !5, i64 0} 469