1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX1 3; RUN: opt < %s -loop-vectorize -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX2 4; RUN: opt < %s -loop-vectorize -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 5 6target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 7target triple = "x86_64-pc_linux" 8 9; The source code: 10; 11;void foo1(int *A, int *B, int *trigger) { 12; 13; for (int i=0; i<10000; i++) { 14; if (trigger[i] < 100) { 15; A[i] = B[i] + trigger[i]; 16; } 17; } 18;} 19 20define void @foo1(i32* nocapture %A, i32* nocapture readonly %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 { 21; AVX1-LABEL: @foo1( 22; AVX1-NEXT: entry: 23; AVX1-NEXT: [[B3:%.*]] = ptrtoint i32* [[B:%.*]] to i64 24; AVX1-NEXT: [[TRIGGER2:%.*]] = ptrtoint i32* [[TRIGGER:%.*]] to i64 25; AVX1-NEXT: [[A1:%.*]] = ptrtoint i32* [[A:%.*]] to i64 26; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 27; AVX1: vector.memcheck: 28; AVX1-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 29; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 30; AVX1-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 31; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 32; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 33; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 34; AVX1: vector.ph: 35; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 36; AVX1: vector.body: 37; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 38; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 39; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] 40; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0 41; AVX1-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <8 x i32>* 42; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP5]], align 4 43; AVX1-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 44; AVX1-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP2]] 45; AVX1-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP7]], i32 0 46; AVX1-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>* 47; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP9]], i32 4, <8 x i1> [[TMP6]], <8 x i32> poison) 48; AVX1-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 49; AVX1-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP2]] 50; AVX1-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i32 0 51; AVX1-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>* 52; AVX1-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP10]], <8 x i32>* [[TMP13]], i32 4, <8 x i1> [[TMP6]]) 53; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 54; AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 55; AVX1-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 56; AVX1: middle.block: 57; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 10000 58; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 59; AVX1: scalar.ph: 60; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 61; AVX1-NEXT: br label [[FOR_BODY:%.*]] 62; AVX1: for.body: 63; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 64; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 65; AVX1-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 66; AVX1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP15]], 100 67; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 68; AVX1: if.then: 69; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 70; AVX1-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 71; AVX1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] 72; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 73; AVX1-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX7]], align 4 74; AVX1-NEXT: br label [[FOR_INC]] 75; AVX1: for.inc: 76; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 77; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 78; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 79; AVX1: for.end: 80; AVX1-NEXT: ret void 81; 82; AVX2-LABEL: @foo1( 83; AVX2-NEXT: entry: 84; AVX2-NEXT: [[B3:%.*]] = ptrtoint i32* [[B:%.*]] to i64 85; AVX2-NEXT: [[TRIGGER2:%.*]] = ptrtoint i32* [[TRIGGER:%.*]] to i64 86; AVX2-NEXT: [[A1:%.*]] = ptrtoint i32* [[A:%.*]] to i64 87; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 88; AVX2: vector.memcheck: 89; AVX2-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 90; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 91; AVX2-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 92; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 128 93; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 94; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 95; AVX2: vector.ph: 96; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 97; AVX2: vector.body: 98; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 99; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 100; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 8 101; AVX2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 16 102; AVX2-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 24 103; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] 104; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]] 105; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP4]] 106; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP5]] 107; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0 108; AVX2-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* 109; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4 110; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 8 111; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>* 112; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4 113; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 16 114; AVX2-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>* 115; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4 116; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 24 117; AVX2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <8 x i32>* 118; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, <8 x i32>* [[TMP17]], align 4 119; AVX2-NEXT: [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 120; AVX2-NEXT: [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 121; AVX2-NEXT: [[TMP20:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 122; AVX2-NEXT: [[TMP21:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 123; AVX2-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP2]] 124; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP3]] 125; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP4]] 126; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP5]] 127; AVX2-NEXT: [[TMP26:%.*]] = getelementptr i32, i32* [[TMP22]], i32 0 128; AVX2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <8 x i32>* 129; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP27]], i32 4, <8 x i1> [[TMP18]], <8 x i32> poison) 130; AVX2-NEXT: [[TMP28:%.*]] = getelementptr i32, i32* [[TMP22]], i32 8 131; AVX2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <8 x i32>* 132; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP29]], i32 4, <8 x i1> [[TMP19]], <8 x i32> poison) 133; AVX2-NEXT: [[TMP30:%.*]] = getelementptr i32, i32* [[TMP22]], i32 16 134; AVX2-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <8 x i32>* 135; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP31]], i32 4, <8 x i1> [[TMP20]], <8 x i32> poison) 136; AVX2-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP22]], i32 24 137; AVX2-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP32]] to <8 x i32>* 138; AVX2-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP33]], i32 4, <8 x i1> [[TMP21]], <8 x i32> poison) 139; AVX2-NEXT: [[TMP34:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 140; AVX2-NEXT: [[TMP35:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD8]], [[WIDE_LOAD5]] 141; AVX2-NEXT: [[TMP36:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] 142; AVX2-NEXT: [[TMP37:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] 143; AVX2-NEXT: [[TMP38:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP2]] 144; AVX2-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP3]] 145; AVX2-NEXT: [[TMP40:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP4]] 146; AVX2-NEXT: [[TMP41:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP5]] 147; AVX2-NEXT: [[TMP42:%.*]] = getelementptr i32, i32* [[TMP38]], i32 0 148; AVX2-NEXT: [[TMP43:%.*]] = bitcast i32* [[TMP42]] to <8 x i32>* 149; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP34]], <8 x i32>* [[TMP43]], i32 4, <8 x i1> [[TMP18]]) 150; AVX2-NEXT: [[TMP44:%.*]] = getelementptr i32, i32* [[TMP38]], i32 8 151; AVX2-NEXT: [[TMP45:%.*]] = bitcast i32* [[TMP44]] to <8 x i32>* 152; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP35]], <8 x i32>* [[TMP45]], i32 4, <8 x i1> [[TMP19]]) 153; AVX2-NEXT: [[TMP46:%.*]] = getelementptr i32, i32* [[TMP38]], i32 16 154; AVX2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <8 x i32>* 155; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP36]], <8 x i32>* [[TMP47]], i32 4, <8 x i1> [[TMP20]]) 156; AVX2-NEXT: [[TMP48:%.*]] = getelementptr i32, i32* [[TMP38]], i32 24 157; AVX2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <8 x i32>* 158; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP37]], <8 x i32>* [[TMP49]], i32 4, <8 x i1> [[TMP21]]) 159; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 160; AVX2-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 161; AVX2-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 162; AVX2: middle.block: 163; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984 164; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 165; AVX2: scalar.ph: 166; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 167; AVX2-NEXT: br label [[FOR_BODY:%.*]] 168; AVX2: for.body: 169; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 170; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 171; AVX2-NEXT: [[TMP51:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 172; AVX2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP51]], 100 173; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 174; AVX2: if.then: 175; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 176; AVX2-NEXT: [[TMP52:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 177; AVX2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP52]], [[TMP51]] 178; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 179; AVX2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX7]], align 4 180; AVX2-NEXT: br label [[FOR_INC]] 181; AVX2: for.inc: 182; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 183; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 184; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 185; AVX2: for.end: 186; AVX2-NEXT: ret void 187; 188; AVX512-LABEL: @foo1( 189; AVX512-NEXT: iter.check: 190; AVX512-NEXT: [[B3:%.*]] = ptrtoint i32* [[B:%.*]] to i64 191; AVX512-NEXT: [[TRIGGER2:%.*]] = ptrtoint i32* [[TRIGGER:%.*]] to i64 192; AVX512-NEXT: [[A1:%.*]] = ptrtoint i32* [[A:%.*]] to i64 193; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 194; AVX512: vector.memcheck: 195; AVX512-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 196; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 256 197; AVX512-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 198; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 256 199; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 200; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 201; AVX512: vector.main.loop.iter.check: 202; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 203; AVX512: vector.ph: 204; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 205; AVX512: vector.body: 206; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 207; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 208; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 16 209; AVX512-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 32 210; AVX512-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 48 211; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] 212; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]] 213; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP4]] 214; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP5]] 215; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0 216; AVX512-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <16 x i32>* 217; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP11]], align 4 218; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 16 219; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <16 x i32>* 220; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, <16 x i32>* [[TMP13]], align 4 221; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 32 222; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <16 x i32>* 223; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, <16 x i32>* [[TMP15]], align 4 224; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 48 225; AVX512-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <16 x i32>* 226; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, <16 x i32>* [[TMP17]], align 4 227; AVX512-NEXT: [[TMP18:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 228; AVX512-NEXT: [[TMP19:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 229; AVX512-NEXT: [[TMP20:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 230; AVX512-NEXT: [[TMP21:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 231; AVX512-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP2]] 232; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP3]] 233; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP4]] 234; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP5]] 235; AVX512-NEXT: [[TMP26:%.*]] = getelementptr i32, i32* [[TMP22]], i32 0 236; AVX512-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <16 x i32>* 237; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP27]], i32 4, <16 x i1> [[TMP18]], <16 x i32> poison) 238; AVX512-NEXT: [[TMP28:%.*]] = getelementptr i32, i32* [[TMP22]], i32 16 239; AVX512-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <16 x i32>* 240; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP29]], i32 4, <16 x i1> [[TMP19]], <16 x i32> poison) 241; AVX512-NEXT: [[TMP30:%.*]] = getelementptr i32, i32* [[TMP22]], i32 32 242; AVX512-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <16 x i32>* 243; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP31]], i32 4, <16 x i1> [[TMP20]], <16 x i32> poison) 244; AVX512-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP22]], i32 48 245; AVX512-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP32]] to <16 x i32>* 246; AVX512-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP33]], i32 4, <16 x i1> [[TMP21]], <16 x i32> poison) 247; AVX512-NEXT: [[TMP34:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 248; AVX512-NEXT: [[TMP35:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD8]], [[WIDE_LOAD5]] 249; AVX512-NEXT: [[TMP36:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] 250; AVX512-NEXT: [[TMP37:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] 251; AVX512-NEXT: [[TMP38:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP2]] 252; AVX512-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP3]] 253; AVX512-NEXT: [[TMP40:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP4]] 254; AVX512-NEXT: [[TMP41:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP5]] 255; AVX512-NEXT: [[TMP42:%.*]] = getelementptr i32, i32* [[TMP38]], i32 0 256; AVX512-NEXT: [[TMP43:%.*]] = bitcast i32* [[TMP42]] to <16 x i32>* 257; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> [[TMP34]], <16 x i32>* [[TMP43]], i32 4, <16 x i1> [[TMP18]]) 258; AVX512-NEXT: [[TMP44:%.*]] = getelementptr i32, i32* [[TMP38]], i32 16 259; AVX512-NEXT: [[TMP45:%.*]] = bitcast i32* [[TMP44]] to <16 x i32>* 260; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> [[TMP35]], <16 x i32>* [[TMP45]], i32 4, <16 x i1> [[TMP19]]) 261; AVX512-NEXT: [[TMP46:%.*]] = getelementptr i32, i32* [[TMP38]], i32 32 262; AVX512-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <16 x i32>* 263; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> [[TMP36]], <16 x i32>* [[TMP47]], i32 4, <16 x i1> [[TMP20]]) 264; AVX512-NEXT: [[TMP48:%.*]] = getelementptr i32, i32* [[TMP38]], i32 48 265; AVX512-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <16 x i32>* 266; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> [[TMP37]], <16 x i32>* [[TMP49]], i32 4, <16 x i1> [[TMP21]]) 267; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 268; AVX512-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 269; AVX512-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 270; AVX512: middle.block: 271; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984 272; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 273; AVX512: vec.epilog.iter.check: 274; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 275; AVX512: vec.epilog.ph: 276; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 277; AVX512-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 278; AVX512: vec.epilog.vector.body: 279; AVX512-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 280; AVX512-NEXT: [[TMP51:%.*]] = add i64 [[OFFSET_IDX]], 0 281; AVX512-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP51]] 282; AVX512-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[TMP52]], i32 0 283; AVX512-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <8 x i32>* 284; AVX512-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x i32>, <8 x i32>* [[TMP54]], align 4 285; AVX512-NEXT: [[TMP55:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD13]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 286; AVX512-NEXT: [[TMP56:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP51]] 287; AVX512-NEXT: [[TMP57:%.*]] = getelementptr i32, i32* [[TMP56]], i32 0 288; AVX512-NEXT: [[TMP58:%.*]] = bitcast i32* [[TMP57]] to <8 x i32>* 289; AVX512-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP58]], i32 4, <8 x i1> [[TMP55]], <8 x i32> poison) 290; AVX512-NEXT: [[TMP59:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD14]], [[WIDE_LOAD13]] 291; AVX512-NEXT: [[TMP60:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP51]] 292; AVX512-NEXT: [[TMP61:%.*]] = getelementptr i32, i32* [[TMP60]], i32 0 293; AVX512-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <8 x i32>* 294; AVX512-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP59]], <8 x i32>* [[TMP62]], i32 4, <8 x i1> [[TMP55]]) 295; AVX512-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[OFFSET_IDX]], 8 296; AVX512-NEXT: [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT15]], 10000 297; AVX512-NEXT: br i1 [[TMP63]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 298; AVX512: vec.epilog.middle.block: 299; AVX512-NEXT: [[CMP_N11:%.*]] = icmp eq i64 10000, 10000 300; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] 301; AVX512: vec.epilog.scalar.ph: 302; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] 303; AVX512-NEXT: br label [[FOR_BODY:%.*]] 304; AVX512: for.body: 305; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 306; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 307; AVX512-NEXT: [[TMP64:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 308; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP64]], 100 309; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 310; AVX512: if.then: 311; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 312; AVX512-NEXT: [[TMP65:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 313; AVX512-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP65]], [[TMP64]] 314; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 315; AVX512-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX7]], align 4 316; AVX512-NEXT: br label [[FOR_INC]] 317; AVX512: for.inc: 318; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 319; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 320; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 321; AVX512: for.end: 322; AVX512-NEXT: ret void 323; 324entry: 325 br label %for.body 326 327for.body: ; preds = %for.inc, %entry 328 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 329 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv 330 %0 = load i32, i32* %arrayidx, align 4 331 %cmp1 = icmp slt i32 %0, 100 332 br i1 %cmp1, label %if.then, label %for.inc 333 334if.then: ; preds = %for.body 335 %arrayidx3 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 336 %1 = load i32, i32* %arrayidx3, align 4 337 %add = add nsw i32 %1, %0 338 %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 339 store i32 %add, i32* %arrayidx7, align 4 340 br label %for.inc 341 342for.inc: ; preds = %for.body, %if.then 343 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 344 %exitcond = icmp eq i64 %indvars.iv.next, 10000 345 br i1 %exitcond, label %for.end, label %for.body 346 347for.end: ; preds = %for.inc 348 ret void 349} 350 351; The same as @foo1 but all the pointers are address space 1 pointers. 352 353define void @foo1_addrspace1(i32 addrspace(1)* nocapture %A, i32 addrspace(1)* nocapture readonly %B, i32 addrspace(1)* nocapture readonly %trigger) local_unnamed_addr #0 { 354; AVX1-LABEL: @foo1_addrspace1( 355; AVX1-NEXT: entry: 356; AVX1-NEXT: [[B3:%.*]] = ptrtoint i32 addrspace(1)* [[B:%.*]] to i64 357; AVX1-NEXT: [[TRIGGER2:%.*]] = ptrtoint i32 addrspace(1)* [[TRIGGER:%.*]] to i64 358; AVX1-NEXT: [[A1:%.*]] = ptrtoint i32 addrspace(1)* [[A:%.*]] to i64 359; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 360; AVX1: vector.memcheck: 361; AVX1-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 362; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 363; AVX1-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 364; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 365; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 366; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 367; AVX1: vector.ph: 368; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 369; AVX1: vector.body: 370; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 371; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 372; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP2]] 373; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP3]], i32 0 374; AVX1-NEXT: [[TMP5:%.*]] = bitcast i32 addrspace(1)* [[TMP4]] to <8 x i32> addrspace(1)* 375; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP5]], align 4 376; AVX1-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 377; AVX1-NEXT: [[TMP7:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 [[TMP2]] 378; AVX1-NEXT: [[TMP8:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP7]], i32 0 379; AVX1-NEXT: [[TMP9:%.*]] = bitcast i32 addrspace(1)* [[TMP8]] to <8 x i32> addrspace(1)* 380; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP9]], i32 4, <8 x i1> [[TMP6]], <8 x i32> poison) 381; AVX1-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 382; AVX1-NEXT: [[TMP11:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 [[TMP2]] 383; AVX1-NEXT: [[TMP12:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP11]], i32 0 384; AVX1-NEXT: [[TMP13:%.*]] = bitcast i32 addrspace(1)* [[TMP12]] to <8 x i32> addrspace(1)* 385; AVX1-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP10]], <8 x i32> addrspace(1)* [[TMP13]], i32 4, <8 x i1> [[TMP6]]) 386; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 387; AVX1-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 388; AVX1-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 389; AVX1: middle.block: 390; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 10000 391; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 392; AVX1: scalar.ph: 393; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 394; AVX1-NEXT: br label [[FOR_BODY:%.*]] 395; AVX1: for.body: 396; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 397; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[INDVARS_IV]] 398; AVX1-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX]], align 4 399; AVX1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP15]], 100 400; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 401; AVX1: if.then: 402; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[INDVARS_IV]] 403; AVX1-NEXT: [[TMP16:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX3]], align 4 404; AVX1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] 405; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[INDVARS_IV]] 406; AVX1-NEXT: store i32 [[ADD]], i32 addrspace(1)* [[ARRAYIDX7]], align 4 407; AVX1-NEXT: br label [[FOR_INC]] 408; AVX1: for.inc: 409; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 410; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 411; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 412; AVX1: for.end: 413; AVX1-NEXT: ret void 414; 415; AVX2-LABEL: @foo1_addrspace1( 416; AVX2-NEXT: entry: 417; AVX2-NEXT: [[B3:%.*]] = ptrtoint i32 addrspace(1)* [[B:%.*]] to i64 418; AVX2-NEXT: [[TRIGGER2:%.*]] = ptrtoint i32 addrspace(1)* [[TRIGGER:%.*]] to i64 419; AVX2-NEXT: [[A1:%.*]] = ptrtoint i32 addrspace(1)* [[A:%.*]] to i64 420; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 421; AVX2: vector.memcheck: 422; AVX2-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 423; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 424; AVX2-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 425; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 128 426; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 427; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 428; AVX2: vector.ph: 429; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 430; AVX2: vector.body: 431; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 432; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 433; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 8 434; AVX2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 16 435; AVX2-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 24 436; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP2]] 437; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP3]] 438; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP4]] 439; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP5]] 440; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP6]], i32 0 441; AVX2-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(1)* [[TMP10]] to <8 x i32> addrspace(1)* 442; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP11]], align 4 443; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP6]], i32 8 444; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32 addrspace(1)* [[TMP12]] to <8 x i32> addrspace(1)* 445; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP13]], align 4 446; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP6]], i32 16 447; AVX2-NEXT: [[TMP15:%.*]] = bitcast i32 addrspace(1)* [[TMP14]] to <8 x i32> addrspace(1)* 448; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP15]], align 4 449; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP6]], i32 24 450; AVX2-NEXT: [[TMP17:%.*]] = bitcast i32 addrspace(1)* [[TMP16]] to <8 x i32> addrspace(1)* 451; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP17]], align 4 452; AVX2-NEXT: [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 453; AVX2-NEXT: [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 454; AVX2-NEXT: [[TMP20:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 455; AVX2-NEXT: [[TMP21:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 456; AVX2-NEXT: [[TMP22:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 [[TMP2]] 457; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 [[TMP3]] 458; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 [[TMP4]] 459; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 [[TMP5]] 460; AVX2-NEXT: [[TMP26:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP22]], i32 0 461; AVX2-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(1)* [[TMP26]] to <8 x i32> addrspace(1)* 462; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP27]], i32 4, <8 x i1> [[TMP18]], <8 x i32> poison) 463; AVX2-NEXT: [[TMP28:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP22]], i32 8 464; AVX2-NEXT: [[TMP29:%.*]] = bitcast i32 addrspace(1)* [[TMP28]] to <8 x i32> addrspace(1)* 465; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP29]], i32 4, <8 x i1> [[TMP19]], <8 x i32> poison) 466; AVX2-NEXT: [[TMP30:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP22]], i32 16 467; AVX2-NEXT: [[TMP31:%.*]] = bitcast i32 addrspace(1)* [[TMP30]] to <8 x i32> addrspace(1)* 468; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP31]], i32 4, <8 x i1> [[TMP20]], <8 x i32> poison) 469; AVX2-NEXT: [[TMP32:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP22]], i32 24 470; AVX2-NEXT: [[TMP33:%.*]] = bitcast i32 addrspace(1)* [[TMP32]] to <8 x i32> addrspace(1)* 471; AVX2-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP33]], i32 4, <8 x i1> [[TMP21]], <8 x i32> poison) 472; AVX2-NEXT: [[TMP34:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 473; AVX2-NEXT: [[TMP35:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD8]], [[WIDE_LOAD5]] 474; AVX2-NEXT: [[TMP36:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] 475; AVX2-NEXT: [[TMP37:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] 476; AVX2-NEXT: [[TMP38:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 [[TMP2]] 477; AVX2-NEXT: [[TMP39:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 [[TMP3]] 478; AVX2-NEXT: [[TMP40:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 [[TMP4]] 479; AVX2-NEXT: [[TMP41:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 [[TMP5]] 480; AVX2-NEXT: [[TMP42:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP38]], i32 0 481; AVX2-NEXT: [[TMP43:%.*]] = bitcast i32 addrspace(1)* [[TMP42]] to <8 x i32> addrspace(1)* 482; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP34]], <8 x i32> addrspace(1)* [[TMP43]], i32 4, <8 x i1> [[TMP18]]) 483; AVX2-NEXT: [[TMP44:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP38]], i32 8 484; AVX2-NEXT: [[TMP45:%.*]] = bitcast i32 addrspace(1)* [[TMP44]] to <8 x i32> addrspace(1)* 485; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP35]], <8 x i32> addrspace(1)* [[TMP45]], i32 4, <8 x i1> [[TMP19]]) 486; AVX2-NEXT: [[TMP46:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP38]], i32 16 487; AVX2-NEXT: [[TMP47:%.*]] = bitcast i32 addrspace(1)* [[TMP46]] to <8 x i32> addrspace(1)* 488; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP36]], <8 x i32> addrspace(1)* [[TMP47]], i32 4, <8 x i1> [[TMP20]]) 489; AVX2-NEXT: [[TMP48:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP38]], i32 24 490; AVX2-NEXT: [[TMP49:%.*]] = bitcast i32 addrspace(1)* [[TMP48]] to <8 x i32> addrspace(1)* 491; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP37]], <8 x i32> addrspace(1)* [[TMP49]], i32 4, <8 x i1> [[TMP21]]) 492; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 493; AVX2-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 494; AVX2-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 495; AVX2: middle.block: 496; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984 497; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 498; AVX2: scalar.ph: 499; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 500; AVX2-NEXT: br label [[FOR_BODY:%.*]] 501; AVX2: for.body: 502; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 503; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[INDVARS_IV]] 504; AVX2-NEXT: [[TMP51:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX]], align 4 505; AVX2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP51]], 100 506; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 507; AVX2: if.then: 508; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[INDVARS_IV]] 509; AVX2-NEXT: [[TMP52:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX3]], align 4 510; AVX2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP52]], [[TMP51]] 511; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[INDVARS_IV]] 512; AVX2-NEXT: store i32 [[ADD]], i32 addrspace(1)* [[ARRAYIDX7]], align 4 513; AVX2-NEXT: br label [[FOR_INC]] 514; AVX2: for.inc: 515; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 516; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 517; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 518; AVX2: for.end: 519; AVX2-NEXT: ret void 520; 521; AVX512-LABEL: @foo1_addrspace1( 522; AVX512-NEXT: iter.check: 523; AVX512-NEXT: [[B3:%.*]] = ptrtoint i32 addrspace(1)* [[B:%.*]] to i64 524; AVX512-NEXT: [[TRIGGER2:%.*]] = ptrtoint i32 addrspace(1)* [[TRIGGER:%.*]] to i64 525; AVX512-NEXT: [[A1:%.*]] = ptrtoint i32 addrspace(1)* [[A:%.*]] to i64 526; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 527; AVX512: vector.memcheck: 528; AVX512-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 529; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 256 530; AVX512-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 531; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 256 532; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 533; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 534; AVX512: vector.main.loop.iter.check: 535; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 536; AVX512: vector.ph: 537; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 538; AVX512: vector.body: 539; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 540; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 541; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 16 542; AVX512-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 32 543; AVX512-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 48 544; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP2]] 545; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP3]] 546; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP4]] 547; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP5]] 548; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP6]], i32 0 549; AVX512-NEXT: [[TMP11:%.*]] = bitcast i32 addrspace(1)* [[TMP10]] to <16 x i32> addrspace(1)* 550; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32> addrspace(1)* [[TMP11]], align 4 551; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP6]], i32 16 552; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32 addrspace(1)* [[TMP12]] to <16 x i32> addrspace(1)* 553; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, <16 x i32> addrspace(1)* [[TMP13]], align 4 554; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP6]], i32 32 555; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32 addrspace(1)* [[TMP14]] to <16 x i32> addrspace(1)* 556; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, <16 x i32> addrspace(1)* [[TMP15]], align 4 557; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP6]], i32 48 558; AVX512-NEXT: [[TMP17:%.*]] = bitcast i32 addrspace(1)* [[TMP16]] to <16 x i32> addrspace(1)* 559; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, <16 x i32> addrspace(1)* [[TMP17]], align 4 560; AVX512-NEXT: [[TMP18:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 561; AVX512-NEXT: [[TMP19:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 562; AVX512-NEXT: [[TMP20:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 563; AVX512-NEXT: [[TMP21:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 564; AVX512-NEXT: [[TMP22:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 [[TMP2]] 565; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 [[TMP3]] 566; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 [[TMP4]] 567; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 [[TMP5]] 568; AVX512-NEXT: [[TMP26:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP22]], i32 0 569; AVX512-NEXT: [[TMP27:%.*]] = bitcast i32 addrspace(1)* [[TMP26]] to <16 x i32> addrspace(1)* 570; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP27]], i32 4, <16 x i1> [[TMP18]], <16 x i32> poison) 571; AVX512-NEXT: [[TMP28:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP22]], i32 16 572; AVX512-NEXT: [[TMP29:%.*]] = bitcast i32 addrspace(1)* [[TMP28]] to <16 x i32> addrspace(1)* 573; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP29]], i32 4, <16 x i1> [[TMP19]], <16 x i32> poison) 574; AVX512-NEXT: [[TMP30:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP22]], i32 32 575; AVX512-NEXT: [[TMP31:%.*]] = bitcast i32 addrspace(1)* [[TMP30]] to <16 x i32> addrspace(1)* 576; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP31]], i32 4, <16 x i1> [[TMP20]], <16 x i32> poison) 577; AVX512-NEXT: [[TMP32:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP22]], i32 48 578; AVX512-NEXT: [[TMP33:%.*]] = bitcast i32 addrspace(1)* [[TMP32]] to <16 x i32> addrspace(1)* 579; AVX512-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1v16i32(<16 x i32> addrspace(1)* [[TMP33]], i32 4, <16 x i1> [[TMP21]], <16 x i32> poison) 580; AVX512-NEXT: [[TMP34:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_LOAD]] 581; AVX512-NEXT: [[TMP35:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD8]], [[WIDE_LOAD5]] 582; AVX512-NEXT: [[TMP36:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] 583; AVX512-NEXT: [[TMP37:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] 584; AVX512-NEXT: [[TMP38:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 [[TMP2]] 585; AVX512-NEXT: [[TMP39:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 [[TMP3]] 586; AVX512-NEXT: [[TMP40:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 [[TMP4]] 587; AVX512-NEXT: [[TMP41:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 [[TMP5]] 588; AVX512-NEXT: [[TMP42:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP38]], i32 0 589; AVX512-NEXT: [[TMP43:%.*]] = bitcast i32 addrspace(1)* [[TMP42]] to <16 x i32> addrspace(1)* 590; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1v16i32(<16 x i32> [[TMP34]], <16 x i32> addrspace(1)* [[TMP43]], i32 4, <16 x i1> [[TMP18]]) 591; AVX512-NEXT: [[TMP44:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP38]], i32 16 592; AVX512-NEXT: [[TMP45:%.*]] = bitcast i32 addrspace(1)* [[TMP44]] to <16 x i32> addrspace(1)* 593; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1v16i32(<16 x i32> [[TMP35]], <16 x i32> addrspace(1)* [[TMP45]], i32 4, <16 x i1> [[TMP19]]) 594; AVX512-NEXT: [[TMP46:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP38]], i32 32 595; AVX512-NEXT: [[TMP47:%.*]] = bitcast i32 addrspace(1)* [[TMP46]] to <16 x i32> addrspace(1)* 596; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1v16i32(<16 x i32> [[TMP36]], <16 x i32> addrspace(1)* [[TMP47]], i32 4, <16 x i1> [[TMP20]]) 597; AVX512-NEXT: [[TMP48:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP38]], i32 48 598; AVX512-NEXT: [[TMP49:%.*]] = bitcast i32 addrspace(1)* [[TMP48]] to <16 x i32> addrspace(1)* 599; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1v16i32(<16 x i32> [[TMP37]], <16 x i32> addrspace(1)* [[TMP49]], i32 4, <16 x i1> [[TMP21]]) 600; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 601; AVX512-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 602; AVX512-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 603; AVX512: middle.block: 604; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984 605; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 606; AVX512: vec.epilog.iter.check: 607; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 608; AVX512: vec.epilog.ph: 609; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 610; AVX512-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 611; AVX512: vec.epilog.vector.body: 612; AVX512-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 613; AVX512-NEXT: [[TMP51:%.*]] = add i64 [[OFFSET_IDX]], 0 614; AVX512-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[TMP51]] 615; AVX512-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP52]], i32 0 616; AVX512-NEXT: [[TMP54:%.*]] = bitcast i32 addrspace(1)* [[TMP53]] to <8 x i32> addrspace(1)* 617; AVX512-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[TMP54]], align 4 618; AVX512-NEXT: [[TMP55:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD13]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 619; AVX512-NEXT: [[TMP56:%.*]] = getelementptr i32, i32 addrspace(1)* [[B]], i64 [[TMP51]] 620; AVX512-NEXT: [[TMP57:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP56]], i32 0 621; AVX512-NEXT: [[TMP58:%.*]] = bitcast i32 addrspace(1)* [[TMP57]] to <8 x i32> addrspace(1)* 622; AVX512-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1v8i32(<8 x i32> addrspace(1)* [[TMP58]], i32 4, <8 x i1> [[TMP55]], <8 x i32> poison) 623; AVX512-NEXT: [[TMP59:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD14]], [[WIDE_LOAD13]] 624; AVX512-NEXT: [[TMP60:%.*]] = getelementptr i32, i32 addrspace(1)* [[A]], i64 [[TMP51]] 625; AVX512-NEXT: [[TMP61:%.*]] = getelementptr i32, i32 addrspace(1)* [[TMP60]], i32 0 626; AVX512-NEXT: [[TMP62:%.*]] = bitcast i32 addrspace(1)* [[TMP61]] to <8 x i32> addrspace(1)* 627; AVX512-NEXT: call void @llvm.masked.store.v8i32.p1v8i32(<8 x i32> [[TMP59]], <8 x i32> addrspace(1)* [[TMP62]], i32 4, <8 x i1> [[TMP55]]) 628; AVX512-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[OFFSET_IDX]], 8 629; AVX512-NEXT: [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT15]], 10000 630; AVX512-NEXT: br i1 [[TMP63]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 631; AVX512: vec.epilog.middle.block: 632; AVX512-NEXT: [[CMP_N11:%.*]] = icmp eq i64 10000, 10000 633; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] 634; AVX512: vec.epilog.scalar.ph: 635; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] 636; AVX512-NEXT: br label [[FOR_BODY:%.*]] 637; AVX512: for.body: 638; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 639; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TRIGGER]], i64 [[INDVARS_IV]] 640; AVX512-NEXT: [[TMP64:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX]], align 4 641; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP64]], 100 642; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 643; AVX512: if.then: 644; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[B]], i64 [[INDVARS_IV]] 645; AVX512-NEXT: [[TMP65:%.*]] = load i32, i32 addrspace(1)* [[ARRAYIDX3]], align 4 646; AVX512-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP65]], [[TMP64]] 647; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[A]], i64 [[INDVARS_IV]] 648; AVX512-NEXT: store i32 [[ADD]], i32 addrspace(1)* [[ARRAYIDX7]], align 4 649; AVX512-NEXT: br label [[FOR_INC]] 650; AVX512: for.inc: 651; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 652; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 653; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 654; AVX512: for.end: 655; AVX512-NEXT: ret void 656; 657entry: 658 br label %for.body 659 660for.body: ; preds = %for.inc, %entry 661 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 662 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %trigger, i64 %indvars.iv 663 %0 = load i32, i32 addrspace(1)* %arrayidx, align 4 664 %cmp1 = icmp slt i32 %0, 100 665 br i1 %cmp1, label %if.then, label %for.inc 666 667if.then: ; preds = %for.body 668 %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %B, i64 %indvars.iv 669 %1 = load i32, i32 addrspace(1)* %arrayidx3, align 4 670 %add = add nsw i32 %1, %0 671 %arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %A, i64 %indvars.iv 672 store i32 %add, i32 addrspace(1)* %arrayidx7, align 4 673 br label %for.inc 674 675for.inc: ; preds = %for.body, %if.then 676 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 677 %exitcond = icmp eq i64 %indvars.iv.next, 10000 678 br i1 %exitcond, label %for.end, label %for.body 679 680for.end: ; preds = %for.inc 681 ret void 682} 683 684; The source code: 685; 686;void foo2(float *A, float *B, int *trigger) { 687; 688; for (int i=0; i<10000; i++) { 689; if (trigger[i] < 100) { 690; A[i] = B[i] + trigger[i]; 691; } 692; } 693;} 694 695define void @foo2(float* nocapture %A, float* nocapture readonly %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 { 696; AVX1-LABEL: @foo2( 697; AVX1-NEXT: entry: 698; AVX1-NEXT: [[B3:%.*]] = ptrtoint float* [[B:%.*]] to i64 699; AVX1-NEXT: [[TRIGGER2:%.*]] = ptrtoint i32* [[TRIGGER:%.*]] to i64 700; AVX1-NEXT: [[A1:%.*]] = ptrtoint float* [[A:%.*]] to i64 701; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 702; AVX1: vector.memcheck: 703; AVX1-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 704; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 705; AVX1-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 706; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 707; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 708; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 709; AVX1: vector.ph: 710; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 711; AVX1: vector.body: 712; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 713; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 714; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] 715; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0 716; AVX1-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <8 x i32>* 717; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP5]], align 4 718; AVX1-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 719; AVX1-NEXT: [[TMP7:%.*]] = getelementptr float, float* [[B]], i64 [[TMP2]] 720; AVX1-NEXT: [[TMP8:%.*]] = getelementptr float, float* [[TMP7]], i32 0 721; AVX1-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <8 x float>* 722; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP9]], i32 4, <8 x i1> [[TMP6]], <8 x float> poison) 723; AVX1-NEXT: [[TMP10:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float> 724; AVX1-NEXT: [[TMP11:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP10]] 725; AVX1-NEXT: [[TMP12:%.*]] = getelementptr float, float* [[A]], i64 [[TMP2]] 726; AVX1-NEXT: [[TMP13:%.*]] = getelementptr float, float* [[TMP12]], i32 0 727; AVX1-NEXT: [[TMP14:%.*]] = bitcast float* [[TMP13]] to <8 x float>* 728; AVX1-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP11]], <8 x float>* [[TMP14]], i32 4, <8 x i1> [[TMP6]]) 729; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 730; AVX1-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 731; AVX1-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 732; AVX1: middle.block: 733; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 10000 734; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 735; AVX1: scalar.ph: 736; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 737; AVX1-NEXT: br label [[FOR_BODY:%.*]] 738; AVX1: for.body: 739; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 740; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 741; AVX1-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 742; AVX1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP16]], 100 743; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 744; AVX1: if.then: 745; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 746; AVX1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 747; AVX1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP16]] to float 748; AVX1-NEXT: [[ADD:%.*]] = fadd float [[TMP17]], [[CONV]] 749; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 750; AVX1-NEXT: store float [[ADD]], float* [[ARRAYIDX7]], align 4 751; AVX1-NEXT: br label [[FOR_INC]] 752; AVX1: for.inc: 753; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 754; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 755; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 756; AVX1: for.end: 757; AVX1-NEXT: ret void 758; 759; AVX2-LABEL: @foo2( 760; AVX2-NEXT: entry: 761; AVX2-NEXT: [[B3:%.*]] = ptrtoint float* [[B:%.*]] to i64 762; AVX2-NEXT: [[TRIGGER2:%.*]] = ptrtoint i32* [[TRIGGER:%.*]] to i64 763; AVX2-NEXT: [[A1:%.*]] = ptrtoint float* [[A:%.*]] to i64 764; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 765; AVX2: vector.memcheck: 766; AVX2-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 767; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 768; AVX2-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 769; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 128 770; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 771; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 772; AVX2: vector.ph: 773; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 774; AVX2: vector.body: 775; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 776; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 777; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 8 778; AVX2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 16 779; AVX2-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 24 780; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] 781; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]] 782; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP4]] 783; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP5]] 784; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0 785; AVX2-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* 786; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4 787; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 8 788; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>* 789; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4 790; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 16 791; AVX2-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>* 792; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4 793; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 24 794; AVX2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <8 x i32>* 795; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, <8 x i32>* [[TMP17]], align 4 796; AVX2-NEXT: [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 797; AVX2-NEXT: [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 798; AVX2-NEXT: [[TMP20:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 799; AVX2-NEXT: [[TMP21:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 800; AVX2-NEXT: [[TMP22:%.*]] = getelementptr float, float* [[B]], i64 [[TMP2]] 801; AVX2-NEXT: [[TMP23:%.*]] = getelementptr float, float* [[B]], i64 [[TMP3]] 802; AVX2-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[B]], i64 [[TMP4]] 803; AVX2-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[B]], i64 [[TMP5]] 804; AVX2-NEXT: [[TMP26:%.*]] = getelementptr float, float* [[TMP22]], i32 0 805; AVX2-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <8 x float>* 806; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP27]], i32 4, <8 x i1> [[TMP18]], <8 x float> poison) 807; AVX2-NEXT: [[TMP28:%.*]] = getelementptr float, float* [[TMP22]], i32 8 808; AVX2-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <8 x float>* 809; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP29]], i32 4, <8 x i1> [[TMP19]], <8 x float> poison) 810; AVX2-NEXT: [[TMP30:%.*]] = getelementptr float, float* [[TMP22]], i32 16 811; AVX2-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <8 x float>* 812; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP31]], i32 4, <8 x i1> [[TMP20]], <8 x float> poison) 813; AVX2-NEXT: [[TMP32:%.*]] = getelementptr float, float* [[TMP22]], i32 24 814; AVX2-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP32]] to <8 x float>* 815; AVX2-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP33]], i32 4, <8 x i1> [[TMP21]], <8 x float> poison) 816; AVX2-NEXT: [[TMP34:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float> 817; AVX2-NEXT: [[TMP35:%.*]] = sitofp <8 x i32> [[WIDE_LOAD5]] to <8 x float> 818; AVX2-NEXT: [[TMP36:%.*]] = sitofp <8 x i32> [[WIDE_LOAD6]] to <8 x float> 819; AVX2-NEXT: [[TMP37:%.*]] = sitofp <8 x i32> [[WIDE_LOAD7]] to <8 x float> 820; AVX2-NEXT: [[TMP38:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP34]] 821; AVX2-NEXT: [[TMP39:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD8]], [[TMP35]] 822; AVX2-NEXT: [[TMP40:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD9]], [[TMP36]] 823; AVX2-NEXT: [[TMP41:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD10]], [[TMP37]] 824; AVX2-NEXT: [[TMP42:%.*]] = getelementptr float, float* [[A]], i64 [[TMP2]] 825; AVX2-NEXT: [[TMP43:%.*]] = getelementptr float, float* [[A]], i64 [[TMP3]] 826; AVX2-NEXT: [[TMP44:%.*]] = getelementptr float, float* [[A]], i64 [[TMP4]] 827; AVX2-NEXT: [[TMP45:%.*]] = getelementptr float, float* [[A]], i64 [[TMP5]] 828; AVX2-NEXT: [[TMP46:%.*]] = getelementptr float, float* [[TMP42]], i32 0 829; AVX2-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <8 x float>* 830; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP38]], <8 x float>* [[TMP47]], i32 4, <8 x i1> [[TMP18]]) 831; AVX2-NEXT: [[TMP48:%.*]] = getelementptr float, float* [[TMP42]], i32 8 832; AVX2-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP48]] to <8 x float>* 833; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP39]], <8 x float>* [[TMP49]], i32 4, <8 x i1> [[TMP19]]) 834; AVX2-NEXT: [[TMP50:%.*]] = getelementptr float, float* [[TMP42]], i32 16 835; AVX2-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP50]] to <8 x float>* 836; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP40]], <8 x float>* [[TMP51]], i32 4, <8 x i1> [[TMP20]]) 837; AVX2-NEXT: [[TMP52:%.*]] = getelementptr float, float* [[TMP42]], i32 24 838; AVX2-NEXT: [[TMP53:%.*]] = bitcast float* [[TMP52]] to <8 x float>* 839; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP41]], <8 x float>* [[TMP53]], i32 4, <8 x i1> [[TMP21]]) 840; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 841; AVX2-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 842; AVX2-NEXT: br i1 [[TMP54]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 843; AVX2: middle.block: 844; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984 845; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 846; AVX2: scalar.ph: 847; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 848; AVX2-NEXT: br label [[FOR_BODY:%.*]] 849; AVX2: for.body: 850; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 851; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 852; AVX2-NEXT: [[TMP55:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 853; AVX2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP55]], 100 854; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 855; AVX2: if.then: 856; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 857; AVX2-NEXT: [[TMP56:%.*]] = load float, float* [[ARRAYIDX3]], align 4 858; AVX2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP55]] to float 859; AVX2-NEXT: [[ADD:%.*]] = fadd float [[TMP56]], [[CONV]] 860; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 861; AVX2-NEXT: store float [[ADD]], float* [[ARRAYIDX7]], align 4 862; AVX2-NEXT: br label [[FOR_INC]] 863; AVX2: for.inc: 864; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 865; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 866; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 867; AVX2: for.end: 868; AVX2-NEXT: ret void 869; 870; AVX512-LABEL: @foo2( 871; AVX512-NEXT: iter.check: 872; AVX512-NEXT: [[B3:%.*]] = ptrtoint float* [[B:%.*]] to i64 873; AVX512-NEXT: [[TRIGGER2:%.*]] = ptrtoint i32* [[TRIGGER:%.*]] to i64 874; AVX512-NEXT: [[A1:%.*]] = ptrtoint float* [[A:%.*]] to i64 875; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 876; AVX512: vector.memcheck: 877; AVX512-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] 878; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 256 879; AVX512-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] 880; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 256 881; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 882; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 883; AVX512: vector.main.loop.iter.check: 884; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 885; AVX512: vector.ph: 886; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 887; AVX512: vector.body: 888; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 889; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 890; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 16 891; AVX512-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 32 892; AVX512-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 48 893; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] 894; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]] 895; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP4]] 896; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP5]] 897; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0 898; AVX512-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <16 x i32>* 899; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP11]], align 4 900; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 16 901; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <16 x i32>* 902; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, <16 x i32>* [[TMP13]], align 4 903; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 32 904; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <16 x i32>* 905; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, <16 x i32>* [[TMP15]], align 4 906; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 48 907; AVX512-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <16 x i32>* 908; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, <16 x i32>* [[TMP17]], align 4 909; AVX512-NEXT: [[TMP18:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 910; AVX512-NEXT: [[TMP19:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 911; AVX512-NEXT: [[TMP20:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 912; AVX512-NEXT: [[TMP21:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 913; AVX512-NEXT: [[TMP22:%.*]] = getelementptr float, float* [[B]], i64 [[TMP2]] 914; AVX512-NEXT: [[TMP23:%.*]] = getelementptr float, float* [[B]], i64 [[TMP3]] 915; AVX512-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[B]], i64 [[TMP4]] 916; AVX512-NEXT: [[TMP25:%.*]] = getelementptr float, float* [[B]], i64 [[TMP5]] 917; AVX512-NEXT: [[TMP26:%.*]] = getelementptr float, float* [[TMP22]], i32 0 918; AVX512-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <16 x float>* 919; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP27]], i32 4, <16 x i1> [[TMP18]], <16 x float> poison) 920; AVX512-NEXT: [[TMP28:%.*]] = getelementptr float, float* [[TMP22]], i32 16 921; AVX512-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <16 x float>* 922; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP29]], i32 4, <16 x i1> [[TMP19]], <16 x float> poison) 923; AVX512-NEXT: [[TMP30:%.*]] = getelementptr float, float* [[TMP22]], i32 32 924; AVX512-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <16 x float>* 925; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP31]], i32 4, <16 x i1> [[TMP20]], <16 x float> poison) 926; AVX512-NEXT: [[TMP32:%.*]] = getelementptr float, float* [[TMP22]], i32 48 927; AVX512-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP32]] to <16 x float>* 928; AVX512-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* [[TMP33]], i32 4, <16 x i1> [[TMP21]], <16 x float> poison) 929; AVX512-NEXT: [[TMP34:%.*]] = sitofp <16 x i32> [[WIDE_LOAD]] to <16 x float> 930; AVX512-NEXT: [[TMP35:%.*]] = sitofp <16 x i32> [[WIDE_LOAD5]] to <16 x float> 931; AVX512-NEXT: [[TMP36:%.*]] = sitofp <16 x i32> [[WIDE_LOAD6]] to <16 x float> 932; AVX512-NEXT: [[TMP37:%.*]] = sitofp <16 x i32> [[WIDE_LOAD7]] to <16 x float> 933; AVX512-NEXT: [[TMP38:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD]], [[TMP34]] 934; AVX512-NEXT: [[TMP39:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD8]], [[TMP35]] 935; AVX512-NEXT: [[TMP40:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD9]], [[TMP36]] 936; AVX512-NEXT: [[TMP41:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD10]], [[TMP37]] 937; AVX512-NEXT: [[TMP42:%.*]] = getelementptr float, float* [[A]], i64 [[TMP2]] 938; AVX512-NEXT: [[TMP43:%.*]] = getelementptr float, float* [[A]], i64 [[TMP3]] 939; AVX512-NEXT: [[TMP44:%.*]] = getelementptr float, float* [[A]], i64 [[TMP4]] 940; AVX512-NEXT: [[TMP45:%.*]] = getelementptr float, float* [[A]], i64 [[TMP5]] 941; AVX512-NEXT: [[TMP46:%.*]] = getelementptr float, float* [[TMP42]], i32 0 942; AVX512-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <16 x float>* 943; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> [[TMP38]], <16 x float>* [[TMP47]], i32 4, <16 x i1> [[TMP18]]) 944; AVX512-NEXT: [[TMP48:%.*]] = getelementptr float, float* [[TMP42]], i32 16 945; AVX512-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP48]] to <16 x float>* 946; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> [[TMP39]], <16 x float>* [[TMP49]], i32 4, <16 x i1> [[TMP19]]) 947; AVX512-NEXT: [[TMP50:%.*]] = getelementptr float, float* [[TMP42]], i32 32 948; AVX512-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP50]] to <16 x float>* 949; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> [[TMP40]], <16 x float>* [[TMP51]], i32 4, <16 x i1> [[TMP20]]) 950; AVX512-NEXT: [[TMP52:%.*]] = getelementptr float, float* [[TMP42]], i32 48 951; AVX512-NEXT: [[TMP53:%.*]] = bitcast float* [[TMP52]] to <16 x float>* 952; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> [[TMP41]], <16 x float>* [[TMP53]], i32 4, <16 x i1> [[TMP21]]) 953; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 954; AVX512-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 955; AVX512-NEXT: br i1 [[TMP54]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 956; AVX512: middle.block: 957; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984 958; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 959; AVX512: vec.epilog.iter.check: 960; AVX512-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 961; AVX512: vec.epilog.ph: 962; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 963; AVX512-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 964; AVX512: vec.epilog.vector.body: 965; AVX512-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 966; AVX512-NEXT: [[TMP55:%.*]] = add i64 [[OFFSET_IDX]], 0 967; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP55]] 968; AVX512-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, i32* [[TMP56]], i32 0 969; AVX512-NEXT: [[TMP58:%.*]] = bitcast i32* [[TMP57]] to <8 x i32>* 970; AVX512-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x i32>, <8 x i32>* [[TMP58]], align 4 971; AVX512-NEXT: [[TMP59:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD13]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 972; AVX512-NEXT: [[TMP60:%.*]] = getelementptr float, float* [[B]], i64 [[TMP55]] 973; AVX512-NEXT: [[TMP61:%.*]] = getelementptr float, float* [[TMP60]], i32 0 974; AVX512-NEXT: [[TMP62:%.*]] = bitcast float* [[TMP61]] to <8 x float>* 975; AVX512-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP62]], i32 4, <8 x i1> [[TMP59]], <8 x float> poison) 976; AVX512-NEXT: [[TMP63:%.*]] = sitofp <8 x i32> [[WIDE_LOAD13]] to <8 x float> 977; AVX512-NEXT: [[TMP64:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD14]], [[TMP63]] 978; AVX512-NEXT: [[TMP65:%.*]] = getelementptr float, float* [[A]], i64 [[TMP55]] 979; AVX512-NEXT: [[TMP66:%.*]] = getelementptr float, float* [[TMP65]], i32 0 980; AVX512-NEXT: [[TMP67:%.*]] = bitcast float* [[TMP66]] to <8 x float>* 981; AVX512-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP64]], <8 x float>* [[TMP67]], i32 4, <8 x i1> [[TMP59]]) 982; AVX512-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[OFFSET_IDX]], 8 983; AVX512-NEXT: [[TMP68:%.*]] = icmp eq i64 [[INDEX_NEXT15]], 10000 984; AVX512-NEXT: br i1 [[TMP68]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 985; AVX512: vec.epilog.middle.block: 986; AVX512-NEXT: [[CMP_N11:%.*]] = icmp eq i64 10000, 10000 987; AVX512-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] 988; AVX512: vec.epilog.scalar.ph: 989; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] 990; AVX512-NEXT: br label [[FOR_BODY:%.*]] 991; AVX512: for.body: 992; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 993; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 994; AVX512-NEXT: [[TMP69:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 995; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP69]], 100 996; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 997; AVX512: if.then: 998; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 999; AVX512-NEXT: [[TMP70:%.*]] = load float, float* [[ARRAYIDX3]], align 4 1000; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP69]] to float 1001; AVX512-NEXT: [[ADD:%.*]] = fadd float [[TMP70]], [[CONV]] 1002; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1003; AVX512-NEXT: store float [[ADD]], float* [[ARRAYIDX7]], align 4 1004; AVX512-NEXT: br label [[FOR_INC]] 1005; AVX512: for.inc: 1006; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1007; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 1008; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1009; AVX512: for.end: 1010; AVX512-NEXT: ret void 1011; 1012entry: 1013 br label %for.body 1014 1015for.body: ; preds = %for.inc, %entry 1016 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 1017 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv 1018 %0 = load i32, i32* %arrayidx, align 4 1019 %cmp1 = icmp slt i32 %0, 100 1020 br i1 %cmp1, label %if.then, label %for.inc 1021 1022if.then: ; preds = %for.body 1023 %arrayidx3 = getelementptr inbounds float, float* %B, i64 %indvars.iv 1024 %1 = load float, float* %arrayidx3, align 4 1025 %conv = sitofp i32 %0 to float 1026 %add = fadd float %1, %conv 1027 %arrayidx7 = getelementptr inbounds float, float* %A, i64 %indvars.iv 1028 store float %add, float* %arrayidx7, align 4 1029 br label %for.inc 1030 1031for.inc: ; preds = %for.body, %if.then 1032 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1033 %exitcond = icmp eq i64 %indvars.iv.next, 10000 1034 br i1 %exitcond, label %for.end, label %for.body 1035 1036for.end: ; preds = %for.inc 1037 ret void 1038} 1039 1040; The source code: 1041; 1042;void foo3(double *A, double *B, int *trigger) { 1043; 1044; for (int i=0; i<10000; i++) { 1045; if (trigger[i] < 100) { 1046; A[i] = B[i] + trigger[i]; 1047; } 1048; } 1049;} 1050 1051define void @foo3(double* nocapture %A, double* nocapture readonly %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 { 1052; AVX-LABEL: @foo3( 1053; AVX-NEXT: entry: 1054; AVX-NEXT: [[A1:%.*]] = bitcast double* [[A:%.*]] to i8* 1055; AVX-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8* 1056; AVX-NEXT: [[B6:%.*]] = bitcast double* [[B:%.*]] to i8* 1057; AVX-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1058; AVX: vector.memcheck: 1059; AVX-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[A]], i64 10000 1060; AVX-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8* 1061; AVX-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000 1062; AVX-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8* 1063; AVX-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[B]], i64 10000 1064; AVX-NEXT: [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8* 1065; AVX-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]] 1066; AVX-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]] 1067; AVX-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1068; AVX-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]] 1069; AVX-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]] 1070; AVX-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]] 1071; AVX-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] 1072; AVX-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1073; AVX: vector.ph: 1074; AVX-NEXT: br label [[VECTOR_BODY:%.*]] 1075; AVX: vector.body: 1076; AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1077; AVX-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1078; AVX-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 1079; AVX-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 1080; AVX-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 1081; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]] 1082; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]] 1083; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] 1084; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]] 1085; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 1086; AVX-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 1087; AVX-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4, !alias.scope !7 1088; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 4 1089; AVX-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* 1090; AVX-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4, !alias.scope !7 1091; AVX-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 8 1092; AVX-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* 1093; AVX-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !7 1094; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 12 1095; AVX-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>* 1096; AVX-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15]], align 4, !alias.scope !7 1097; AVX-NEXT: [[TMP16:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100> 1098; AVX-NEXT: [[TMP17:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD12]], <i32 100, i32 100, i32 100, i32 100> 1099; AVX-NEXT: [[TMP18:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD13]], <i32 100, i32 100, i32 100, i32 100> 1100; AVX-NEXT: [[TMP19:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD14]], <i32 100, i32 100, i32 100, i32 100> 1101; AVX-NEXT: [[TMP20:%.*]] = getelementptr double, double* [[B]], i64 [[TMP0]] 1102; AVX-NEXT: [[TMP21:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]] 1103; AVX-NEXT: [[TMP22:%.*]] = getelementptr double, double* [[B]], i64 [[TMP2]] 1104; AVX-NEXT: [[TMP23:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]] 1105; AVX-NEXT: [[TMP24:%.*]] = getelementptr double, double* [[TMP20]], i32 0 1106; AVX-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP24]] to <4 x double>* 1107; AVX-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP25]], i32 8, <4 x i1> [[TMP16]], <4 x double> poison), !alias.scope !10 1108; AVX-NEXT: [[TMP26:%.*]] = getelementptr double, double* [[TMP20]], i32 4 1109; AVX-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>* 1110; AVX-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP27]], i32 8, <4 x i1> [[TMP17]], <4 x double> poison), !alias.scope !10 1111; AVX-NEXT: [[TMP28:%.*]] = getelementptr double, double* [[TMP20]], i32 8 1112; AVX-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <4 x double>* 1113; AVX-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP29]], i32 8, <4 x i1> [[TMP18]], <4 x double> poison), !alias.scope !10 1114; AVX-NEXT: [[TMP30:%.*]] = getelementptr double, double* [[TMP20]], i32 12 1115; AVX-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <4 x double>* 1116; AVX-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP31]], i32 8, <4 x i1> [[TMP19]], <4 x double> poison), !alias.scope !10 1117; AVX-NEXT: [[TMP32:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double> 1118; AVX-NEXT: [[TMP33:%.*]] = sitofp <4 x i32> [[WIDE_LOAD12]] to <4 x double> 1119; AVX-NEXT: [[TMP34:%.*]] = sitofp <4 x i32> [[WIDE_LOAD13]] to <4 x double> 1120; AVX-NEXT: [[TMP35:%.*]] = sitofp <4 x i32> [[WIDE_LOAD14]] to <4 x double> 1121; AVX-NEXT: [[TMP36:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], [[TMP32]] 1122; AVX-NEXT: [[TMP37:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD15]], [[TMP33]] 1123; AVX-NEXT: [[TMP38:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD16]], [[TMP34]] 1124; AVX-NEXT: [[TMP39:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD17]], [[TMP35]] 1125; AVX-NEXT: [[TMP40:%.*]] = getelementptr double, double* [[A]], i64 [[TMP0]] 1126; AVX-NEXT: [[TMP41:%.*]] = getelementptr double, double* [[A]], i64 [[TMP1]] 1127; AVX-NEXT: [[TMP42:%.*]] = getelementptr double, double* [[A]], i64 [[TMP2]] 1128; AVX-NEXT: [[TMP43:%.*]] = getelementptr double, double* [[A]], i64 [[TMP3]] 1129; AVX-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[TMP40]], i32 0 1130; AVX-NEXT: [[TMP45:%.*]] = bitcast double* [[TMP44]] to <4 x double>* 1131; AVX-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP36]], <4 x double>* [[TMP45]], i32 8, <4 x i1> [[TMP16]]), !alias.scope !12, !noalias !14 1132; AVX-NEXT: [[TMP46:%.*]] = getelementptr double, double* [[TMP40]], i32 4 1133; AVX-NEXT: [[TMP47:%.*]] = bitcast double* [[TMP46]] to <4 x double>* 1134; AVX-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP37]], <4 x double>* [[TMP47]], i32 8, <4 x i1> [[TMP17]]), !alias.scope !12, !noalias !14 1135; AVX-NEXT: [[TMP48:%.*]] = getelementptr double, double* [[TMP40]], i32 8 1136; AVX-NEXT: [[TMP49:%.*]] = bitcast double* [[TMP48]] to <4 x double>* 1137; AVX-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP38]], <4 x double>* [[TMP49]], i32 8, <4 x i1> [[TMP18]]), !alias.scope !12, !noalias !14 1138; AVX-NEXT: [[TMP50:%.*]] = getelementptr double, double* [[TMP40]], i32 12 1139; AVX-NEXT: [[TMP51:%.*]] = bitcast double* [[TMP50]] to <4 x double>* 1140; AVX-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP39]], <4 x double>* [[TMP51]], i32 8, <4 x i1> [[TMP19]]), !alias.scope !12, !noalias !14 1141; AVX-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1142; AVX-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 1143; AVX-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1144; AVX: middle.block: 1145; AVX-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 10000 1146; AVX-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1147; AVX: scalar.ph: 1148; AVX-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 1149; AVX-NEXT: br label [[FOR_BODY:%.*]] 1150; AVX: for.body: 1151; AVX-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1152; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 1153; AVX-NEXT: [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1154; AVX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP53]], 100 1155; AVX-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1156; AVX: if.then: 1157; AVX-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV]] 1158; AVX-NEXT: [[TMP54:%.*]] = load double, double* [[ARRAYIDX3]], align 8 1159; AVX-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP53]] to double 1160; AVX-NEXT: [[ADD:%.*]] = fadd double [[TMP54]], [[CONV]] 1161; AVX-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]] 1162; AVX-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 1163; AVX-NEXT: br label [[FOR_INC]] 1164; AVX: for.inc: 1165; AVX-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1166; AVX-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 1167; AVX-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 1168; AVX: for.end: 1169; AVX-NEXT: ret void 1170; 1171; AVX512-LABEL: @foo3( 1172; AVX512-NEXT: entry: 1173; AVX512-NEXT: [[A1:%.*]] = bitcast double* [[A:%.*]] to i8* 1174; AVX512-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8* 1175; AVX512-NEXT: [[B6:%.*]] = bitcast double* [[B:%.*]] to i8* 1176; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1177; AVX512: vector.memcheck: 1178; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[A]], i64 10000 1179; AVX512-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8* 1180; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000 1181; AVX512-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8* 1182; AVX512-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[B]], i64 10000 1183; AVX512-NEXT: [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8* 1184; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]] 1185; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]] 1186; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1187; AVX512-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]] 1188; AVX512-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]] 1189; AVX512-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]] 1190; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] 1191; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1192; AVX512: vector.ph: 1193; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 1194; AVX512: vector.body: 1195; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1196; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1197; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 1198; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 1199; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24 1200; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]] 1201; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]] 1202; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] 1203; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]] 1204; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 1205; AVX512-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>* 1206; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP9]], align 4, !alias.scope !11 1207; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 8 1208; AVX512-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* 1209; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4, !alias.scope !11 1210; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16 1211; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>* 1212; AVX512-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !11 1213; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 24 1214; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>* 1215; AVX512-NEXT: [[WIDE_LOAD14:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4, !alias.scope !11 1216; AVX512-NEXT: [[TMP16:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 1217; AVX512-NEXT: [[TMP17:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 1218; AVX512-NEXT: [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD13]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 1219; AVX512-NEXT: [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD14]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 1220; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, double* [[B]], i64 [[TMP0]] 1221; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]] 1222; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, double* [[B]], i64 [[TMP2]] 1223; AVX512-NEXT: [[TMP23:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]] 1224; AVX512-NEXT: [[TMP24:%.*]] = getelementptr double, double* [[TMP20]], i32 0 1225; AVX512-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP24]] to <8 x double>* 1226; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP25]], i32 8, <8 x i1> [[TMP16]], <8 x double> poison), !alias.scope !14 1227; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double, double* [[TMP20]], i32 8 1228; AVX512-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <8 x double>* 1229; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP27]], i32 8, <8 x i1> [[TMP17]], <8 x double> poison), !alias.scope !14 1230; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, double* [[TMP20]], i32 16 1231; AVX512-NEXT: [[TMP29:%.*]] = bitcast double* [[TMP28]] to <8 x double>* 1232; AVX512-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP29]], i32 8, <8 x i1> [[TMP18]], <8 x double> poison), !alias.scope !14 1233; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, double* [[TMP20]], i32 24 1234; AVX512-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <8 x double>* 1235; AVX512-NEXT: [[WIDE_MASKED_LOAD17:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP31]], i32 8, <8 x i1> [[TMP19]], <8 x double> poison), !alias.scope !14 1236; AVX512-NEXT: [[TMP32:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x double> 1237; AVX512-NEXT: [[TMP33:%.*]] = sitofp <8 x i32> [[WIDE_LOAD12]] to <8 x double> 1238; AVX512-NEXT: [[TMP34:%.*]] = sitofp <8 x i32> [[WIDE_LOAD13]] to <8 x double> 1239; AVX512-NEXT: [[TMP35:%.*]] = sitofp <8 x i32> [[WIDE_LOAD14]] to <8 x double> 1240; AVX512-NEXT: [[TMP36:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD]], [[TMP32]] 1241; AVX512-NEXT: [[TMP37:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD15]], [[TMP33]] 1242; AVX512-NEXT: [[TMP38:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD16]], [[TMP34]] 1243; AVX512-NEXT: [[TMP39:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD17]], [[TMP35]] 1244; AVX512-NEXT: [[TMP40:%.*]] = getelementptr double, double* [[A]], i64 [[TMP0]] 1245; AVX512-NEXT: [[TMP41:%.*]] = getelementptr double, double* [[A]], i64 [[TMP1]] 1246; AVX512-NEXT: [[TMP42:%.*]] = getelementptr double, double* [[A]], i64 [[TMP2]] 1247; AVX512-NEXT: [[TMP43:%.*]] = getelementptr double, double* [[A]], i64 [[TMP3]] 1248; AVX512-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[TMP40]], i32 0 1249; AVX512-NEXT: [[TMP45:%.*]] = bitcast double* [[TMP44]] to <8 x double>* 1250; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[TMP36]], <8 x double>* [[TMP45]], i32 8, <8 x i1> [[TMP16]]), !alias.scope !16, !noalias !18 1251; AVX512-NEXT: [[TMP46:%.*]] = getelementptr double, double* [[TMP40]], i32 8 1252; AVX512-NEXT: [[TMP47:%.*]] = bitcast double* [[TMP46]] to <8 x double>* 1253; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[TMP37]], <8 x double>* [[TMP47]], i32 8, <8 x i1> [[TMP17]]), !alias.scope !16, !noalias !18 1254; AVX512-NEXT: [[TMP48:%.*]] = getelementptr double, double* [[TMP40]], i32 16 1255; AVX512-NEXT: [[TMP49:%.*]] = bitcast double* [[TMP48]] to <8 x double>* 1256; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[TMP38]], <8 x double>* [[TMP49]], i32 8, <8 x i1> [[TMP18]]), !alias.scope !16, !noalias !18 1257; AVX512-NEXT: [[TMP50:%.*]] = getelementptr double, double* [[TMP40]], i32 24 1258; AVX512-NEXT: [[TMP51:%.*]] = bitcast double* [[TMP50]] to <8 x double>* 1259; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[TMP39]], <8 x double>* [[TMP51]], i32 8, <8 x i1> [[TMP19]]), !alias.scope !16, !noalias !18 1260; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 1261; AVX512-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 1262; AVX512-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 1263; AVX512: middle.block: 1264; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984 1265; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1266; AVX512: scalar.ph: 1267; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 1268; AVX512-NEXT: br label [[FOR_BODY:%.*]] 1269; AVX512: for.body: 1270; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1271; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 1272; AVX512-NEXT: [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1273; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP53]], 100 1274; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1275; AVX512: if.then: 1276; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV]] 1277; AVX512-NEXT: [[TMP54:%.*]] = load double, double* [[ARRAYIDX3]], align 8 1278; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP53]] to double 1279; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP54]], [[CONV]] 1280; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]] 1281; AVX512-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 1282; AVX512-NEXT: br label [[FOR_INC]] 1283; AVX512: for.inc: 1284; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1285; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 1286; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1287; AVX512: for.end: 1288; AVX512-NEXT: ret void 1289; 1290entry: 1291 br label %for.body 1292 1293for.body: ; preds = %for.inc, %entry 1294 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 1295 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv 1296 %0 = load i32, i32* %arrayidx, align 4 1297 %cmp1 = icmp slt i32 %0, 100 1298 br i1 %cmp1, label %if.then, label %for.inc 1299 1300if.then: ; preds = %for.body 1301 %arrayidx3 = getelementptr inbounds double, double* %B, i64 %indvars.iv 1302 %1 = load double, double* %arrayidx3, align 8 1303 %conv = sitofp i32 %0 to double 1304 %add = fadd double %1, %conv 1305 %arrayidx7 = getelementptr inbounds double, double* %A, i64 %indvars.iv 1306 store double %add, double* %arrayidx7, align 8 1307 br label %for.inc 1308 1309for.inc: ; preds = %for.body, %if.then 1310 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1311 %exitcond = icmp eq i64 %indvars.iv.next, 10000 1312 br i1 %exitcond, label %for.end, label %for.body 1313 1314for.end: ; preds = %for.inc 1315 ret void 1316} 1317 1318; The source code: 1319; 1320;void foo4(double *A, double *B, int *trigger) { 1321; 1322; for (int i=0; i<10000; i += 16) { 1323; if (trigger[i] < 100) { 1324; A[i] = B[i*2] + trigger[i]; << non-cosecutive access 1325; } 1326; } 1327;} 1328 1329define void @foo4(double* nocapture %A, double* nocapture readonly %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 { 1330; AVX-LABEL: @foo4( 1331; AVX-NEXT: entry: 1332; AVX-NEXT: br label [[FOR_BODY:%.*]] 1333; AVX: for.body: 1334; AVX-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1335; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]] 1336; AVX-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1337; AVX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], 100 1338; AVX-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1339; AVX: if.then: 1340; AVX-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 1341; AVX-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 [[TMP1]] 1342; AVX-NEXT: [[TMP2:%.*]] = load double, double* [[ARRAYIDX3]], align 8 1343; AVX-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double 1344; AVX-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], [[CONV]] 1345; AVX-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]] 1346; AVX-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 1347; AVX-NEXT: br label [[FOR_INC]] 1348; AVX: for.inc: 1349; AVX-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 1350; AVX-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 1351; AVX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] 1352; AVX: for.end: 1353; AVX-NEXT: ret void 1354; 1355; AVX512-LABEL: @foo4( 1356; AVX512-NEXT: entry: 1357; AVX512-NEXT: [[A1:%.*]] = bitcast double* [[A:%.*]] to i8* 1358; AVX512-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8* 1359; AVX512-NEXT: [[B6:%.*]] = bitcast double* [[B:%.*]] to i8* 1360; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1361; AVX512: vector.memcheck: 1362; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[A]], i64 9985 1363; AVX512-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8* 1364; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 9985 1365; AVX512-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8* 1366; AVX512-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[B]], i64 19969 1367; AVX512-NEXT: [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8* 1368; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]] 1369; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]] 1370; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1371; AVX512-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]] 1372; AVX512-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]] 1373; AVX512-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]] 1374; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] 1375; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1376; AVX512: vector.ph: 1377; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 1378; AVX512: vector.body: 1379; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1380; AVX512-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1381; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], <8 x i64> [[VEC_IND]] 1382; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP0]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef), !alias.scope !21 1383; AVX512-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100> 1384; AVX512-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 1385; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[B]], <8 x i64> [[TMP2]] 1386; AVX512-NEXT: [[WIDE_MASKED_GATHER12:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> [[TMP3]], i32 8, <8 x i1> [[TMP1]], <8 x double> undef), !alias.scope !24 1387; AVX512-NEXT: [[TMP4:%.*]] = sitofp <8 x i32> [[WIDE_MASKED_GATHER]] to <8 x double> 1388; AVX512-NEXT: [[TMP5:%.*]] = fadd <8 x double> [[WIDE_MASKED_GATHER12]], [[TMP4]] 1389; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[A]], <8 x i64> [[VEC_IND]] 1390; AVX512-NEXT: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> [[TMP5]], <8 x double*> [[TMP6]], i32 8, <8 x i1> [[TMP1]]), !alias.scope !26, !noalias !28 1391; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1392; AVX512-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 128, i64 128, i64 128, i64 128, i64 128, i64 128, i64 128, i64 128> 1393; AVX512-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 624 1394; AVX512-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] 1395; AVX512: middle.block: 1396; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, 624 1397; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1398; AVX512: scalar.ph: 1399; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 1400; AVX512-NEXT: br label [[FOR_BODY:%.*]] 1401; AVX512: for.body: 1402; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1403; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 1404; AVX512-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1405; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 100 1406; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1407; AVX512: if.then: 1408; AVX512-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 1409; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP9]] 1410; AVX512-NEXT: [[TMP10:%.*]] = load double, double* [[ARRAYIDX3]], align 8 1411; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double 1412; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP10]], [[CONV]] 1413; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]] 1414; AVX512-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 1415; AVX512-NEXT: br label [[FOR_INC]] 1416; AVX512: for.inc: 1417; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 1418; AVX512-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 1419; AVX512-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP30:![0-9]+]] 1420; AVX512: for.end: 1421; AVX512-NEXT: ret void 1422; 1423entry: 1424 br label %for.body 1425 1426for.body: ; preds = %entry, %for.inc 1427 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 1428 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv 1429 %0 = load i32, i32* %arrayidx, align 4 1430 %cmp1 = icmp slt i32 %0, 100 1431 br i1 %cmp1, label %if.then, label %for.inc 1432 1433if.then: ; preds = %for.body 1434 %1 = shl nuw nsw i64 %indvars.iv, 1 1435 %arrayidx3 = getelementptr inbounds double, double* %B, i64 %1 1436 %2 = load double, double* %arrayidx3, align 8 1437 %conv = sitofp i32 %0 to double 1438 %add = fadd double %2, %conv 1439 %arrayidx7 = getelementptr inbounds double, double* %A, i64 %indvars.iv 1440 store double %add, double* %arrayidx7, align 8 1441 br label %for.inc 1442 1443for.inc: ; preds = %for.body, %if.then 1444 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 16 1445 %cmp = icmp ult i64 %indvars.iv.next, 10000 1446 br i1 %cmp, label %for.body, label %for.end 1447 1448for.end: ; preds = %for.inc 1449 ret void 1450} 1451 1452@a = common global [1 x i32*] zeroinitializer, align 8 1453@c = common global i32* null, align 8 1454 1455; Reverse loop 1456;void foo6(double *in, double *out, unsigned size, int *trigger) { 1457; 1458; for (int i=SIZE-1; i>=0; i--) { 1459; if (trigger[i] > 0) { 1460; out[i] = in[i] + (double) 0.5; 1461; } 1462; } 1463;} 1464 1465define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %size, i32* nocapture readonly %trigger) local_unnamed_addr #0 { 1466; AVX1-LABEL: @foo6( 1467; AVX1-NEXT: entry: 1468; AVX1-NEXT: br label [[FOR_BODY:%.*]] 1469; AVX1: for.body: 1470; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 4095, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1471; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]] 1472; AVX1-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1473; AVX1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 1474; AVX1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1475; AVX1: if.then: 1476; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[IN:%.*]], i64 [[INDVARS_IV]] 1477; AVX1-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX3]], align 8 1478; AVX1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e-01 1479; AVX1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[INDVARS_IV]] 1480; AVX1-NEXT: store double [[ADD]], double* [[ARRAYIDX5]], align 8 1481; AVX1-NEXT: br label [[FOR_INC]] 1482; AVX1: for.inc: 1483; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 1484; AVX1-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 1485; AVX1-NEXT: br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1486; AVX1: for.end: 1487; AVX1-NEXT: ret void 1488; 1489; AVX2-LABEL: @foo6( 1490; AVX2-NEXT: entry: 1491; AVX2-NEXT: [[OUT1:%.*]] = bitcast double* [[OUT:%.*]] to i8* 1492; AVX2-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8* 1493; AVX2-NEXT: [[IN6:%.*]] = bitcast double* [[IN:%.*]] to i8* 1494; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1495; AVX2: vector.memcheck: 1496; AVX2-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[OUT]], i64 4096 1497; AVX2-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8* 1498; AVX2-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 4096 1499; AVX2-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8* 1500; AVX2-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[IN]], i64 4096 1501; AVX2-NEXT: [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8* 1502; AVX2-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP45]] 1503; AVX2-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]] 1504; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1505; AVX2-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP78]] 1506; AVX2-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[IN6]], [[SCEVGEP2]] 1507; AVX2-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]] 1508; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] 1509; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1510; AVX2: vector.ph: 1511; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 1512; AVX2: vector.body: 1513; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1514; AVX2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] 1515; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 1516; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -4 1517; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -8 1518; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -12 1519; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]] 1520; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]] 1521; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] 1522; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]] 1523; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 1524; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -3 1525; AVX2-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* 1526; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !17 1527; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1528; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -4 1529; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -3 1530; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* 1531; AVX2-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !17 1532; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD12]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1533; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8 1534; AVX2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -3 1535; AVX2-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>* 1536; AVX2-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !17 1537; AVX2-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD14]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1538; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -12 1539; AVX2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -3 1540; AVX2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 1541; AVX2-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4, !alias.scope !17 1542; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD16]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1543; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer 1544; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt <4 x i32> [[REVERSE13]], zeroinitializer 1545; AVX2-NEXT: [[TMP22:%.*]] = icmp sgt <4 x i32> [[REVERSE15]], zeroinitializer 1546; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt <4 x i32> [[REVERSE17]], zeroinitializer 1547; AVX2-NEXT: [[TMP24:%.*]] = getelementptr double, double* [[IN]], i64 [[TMP0]] 1548; AVX2-NEXT: [[TMP25:%.*]] = getelementptr double, double* [[IN]], i64 [[TMP1]] 1549; AVX2-NEXT: [[TMP26:%.*]] = getelementptr double, double* [[IN]], i64 [[TMP2]] 1550; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, double* [[IN]], i64 [[TMP3]] 1551; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, double* [[TMP24]], i32 0 1552; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, double* [[TMP28]], i32 -3 1553; AVX2-NEXT: [[REVERSE18:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1554; AVX2-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>* 1555; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE18]], <4 x double> poison), !alias.scope !20 1556; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1557; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, double* [[TMP24]], i32 -4 1558; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double, double* [[TMP31]], i32 -3 1559; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1560; AVX2-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>* 1561; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope !20 1562; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1563; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double, double* [[TMP24]], i32 -8 1564; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, double* [[TMP34]], i32 -3 1565; AVX2-NEXT: [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1566; AVX2-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>* 1567; AVX2-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> poison), !alias.scope !20 1568; AVX2-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1569; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, double* [[TMP24]], i32 -12 1570; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, double* [[TMP37]], i32 -3 1571; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1572; AVX2-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>* 1573; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> poison), !alias.scope !20 1574; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1575; AVX2-NEXT: [[TMP40:%.*]] = fadd <4 x double> [[REVERSE19]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01> 1576; AVX2-NEXT: [[TMP41:%.*]] = fadd <4 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01> 1577; AVX2-NEXT: [[TMP42:%.*]] = fadd <4 x double> [[REVERSE25]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01> 1578; AVX2-NEXT: [[TMP43:%.*]] = fadd <4 x double> [[REVERSE28]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01> 1579; AVX2-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP0]] 1580; AVX2-NEXT: [[TMP45:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP1]] 1581; AVX2-NEXT: [[TMP46:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP2]] 1582; AVX2-NEXT: [[TMP47:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP3]] 1583; AVX2-NEXT: [[REVERSE29:%.*]] = shufflevector <4 x double> [[TMP40]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1584; AVX2-NEXT: [[TMP48:%.*]] = getelementptr double, double* [[TMP44]], i32 0 1585; AVX2-NEXT: [[TMP49:%.*]] = getelementptr double, double* [[TMP48]], i32 -3 1586; AVX2-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <4 x double>* 1587; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE29]], <4 x double>* [[TMP50]], i32 8, <4 x i1> [[REVERSE18]]), !alias.scope !22, !noalias !24 1588; AVX2-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x double> [[TMP41]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1589; AVX2-NEXT: [[TMP51:%.*]] = getelementptr double, double* [[TMP44]], i32 -4 1590; AVX2-NEXT: [[TMP52:%.*]] = getelementptr double, double* [[TMP51]], i32 -3 1591; AVX2-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <4 x double>* 1592; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE31]], <4 x double>* [[TMP53]], i32 8, <4 x i1> [[REVERSE20]]), !alias.scope !22, !noalias !24 1593; AVX2-NEXT: [[REVERSE33:%.*]] = shufflevector <4 x double> [[TMP42]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1594; AVX2-NEXT: [[TMP54:%.*]] = getelementptr double, double* [[TMP44]], i32 -8 1595; AVX2-NEXT: [[TMP55:%.*]] = getelementptr double, double* [[TMP54]], i32 -3 1596; AVX2-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <4 x double>* 1597; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE33]], <4 x double>* [[TMP56]], i32 8, <4 x i1> [[REVERSE23]]), !alias.scope !22, !noalias !24 1598; AVX2-NEXT: [[REVERSE35:%.*]] = shufflevector <4 x double> [[TMP43]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1599; AVX2-NEXT: [[TMP57:%.*]] = getelementptr double, double* [[TMP44]], i32 -12 1600; AVX2-NEXT: [[TMP58:%.*]] = getelementptr double, double* [[TMP57]], i32 -3 1601; AVX2-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>* 1602; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE35]], <4 x double>* [[TMP59]], i32 8, <4 x i1> [[REVERSE26]]), !alias.scope !22, !noalias !24 1603; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1604; AVX2-NEXT: [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 1605; AVX2-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] 1606; AVX2: middle.block: 1607; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 1608; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1609; AVX2: scalar.ph: 1610; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[ENTRY:%.*]] ], [ 4095, [[VECTOR_MEMCHECK]] ] 1611; AVX2-NEXT: br label [[FOR_BODY:%.*]] 1612; AVX2: for.body: 1613; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1614; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 1615; AVX2-NEXT: [[TMP61:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1616; AVX2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP61]], 0 1617; AVX2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1618; AVX2: if.then: 1619; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[INDVARS_IV]] 1620; AVX2-NEXT: [[TMP62:%.*]] = load double, double* [[ARRAYIDX3]], align 8 1621; AVX2-NEXT: [[ADD:%.*]] = fadd double [[TMP62]], 5.000000e-01 1622; AVX2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]] 1623; AVX2-NEXT: store double [[ADD]], double* [[ARRAYIDX5]], align 8 1624; AVX2-NEXT: br label [[FOR_INC]] 1625; AVX2: for.inc: 1626; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 1627; AVX2-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 1628; AVX2-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1629; AVX2: for.end: 1630; AVX2-NEXT: ret void 1631; 1632; AVX512-LABEL: @foo6( 1633; AVX512-NEXT: entry: 1634; AVX512-NEXT: [[OUT1:%.*]] = bitcast double* [[OUT:%.*]] to i8* 1635; AVX512-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8* 1636; AVX512-NEXT: [[IN6:%.*]] = bitcast double* [[IN:%.*]] to i8* 1637; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1638; AVX512: vector.memcheck: 1639; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[OUT]], i64 4096 1640; AVX512-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8* 1641; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 4096 1642; AVX512-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8* 1643; AVX512-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[IN]], i64 4096 1644; AVX512-NEXT: [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8* 1645; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP45]] 1646; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]] 1647; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1648; AVX512-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP78]] 1649; AVX512-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[IN6]], [[SCEVGEP2]] 1650; AVX512-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]] 1651; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] 1652; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1653; AVX512: vector.ph: 1654; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 1655; AVX512: vector.body: 1656; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1657; AVX512-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] 1658; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 1659; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -8 1660; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -16 1661; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -24 1662; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]] 1663; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]] 1664; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] 1665; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]] 1666; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 1667; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -7 1668; AVX512-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>* 1669; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !alias.scope !31 1670; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1671; AVX512-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8 1672; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -7 1673; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>* 1674; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !31 1675; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD12]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1676; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -16 1677; AVX512-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -7 1678; AVX512-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <8 x i32>* 1679; AVX512-NEXT: [[WIDE_LOAD14:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 4, !alias.scope !31 1680; AVX512-NEXT: [[REVERSE15:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD14]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1681; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -24 1682; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -7 1683; AVX512-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <8 x i32>* 1684; AVX512-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP19]], align 4, !alias.scope !31 1685; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD16]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1686; AVX512-NEXT: [[TMP20:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer 1687; AVX512-NEXT: [[TMP21:%.*]] = icmp sgt <8 x i32> [[REVERSE13]], zeroinitializer 1688; AVX512-NEXT: [[TMP22:%.*]] = icmp sgt <8 x i32> [[REVERSE15]], zeroinitializer 1689; AVX512-NEXT: [[TMP23:%.*]] = icmp sgt <8 x i32> [[REVERSE17]], zeroinitializer 1690; AVX512-NEXT: [[TMP24:%.*]] = getelementptr double, double* [[IN]], i64 [[TMP0]] 1691; AVX512-NEXT: [[TMP25:%.*]] = getelementptr double, double* [[IN]], i64 [[TMP1]] 1692; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double, double* [[IN]], i64 [[TMP2]] 1693; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, double* [[IN]], i64 [[TMP3]] 1694; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, double* [[TMP24]], i32 0 1695; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, double* [[TMP28]], i32 -7 1696; AVX512-NEXT: [[REVERSE18:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1697; AVX512-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <8 x double>* 1698; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP30]], i32 8, <8 x i1> [[REVERSE18]], <8 x double> poison), !alias.scope !34 1699; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1700; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, double* [[TMP24]], i32 -8 1701; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double, double* [[TMP31]], i32 -7 1702; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP21]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1703; AVX512-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <8 x double>* 1704; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP33]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope !34 1705; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1706; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, double* [[TMP24]], i32 -16 1707; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, double* [[TMP34]], i32 -7 1708; AVX512-NEXT: [[REVERSE23:%.*]] = shufflevector <8 x i1> [[TMP22]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1709; AVX512-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <8 x double>* 1710; AVX512-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP36]], i32 8, <8 x i1> [[REVERSE23]], <8 x double> poison), !alias.scope !34 1711; AVX512-NEXT: [[REVERSE25:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD24]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1712; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, double* [[TMP24]], i32 -24 1713; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, double* [[TMP37]], i32 -7 1714; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x i1> [[TMP23]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1715; AVX512-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <8 x double>* 1716; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP39]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> poison), !alias.scope !34 1717; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD27]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1718; AVX512-NEXT: [[TMP40:%.*]] = fadd <8 x double> [[REVERSE19]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01> 1719; AVX512-NEXT: [[TMP41:%.*]] = fadd <8 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01> 1720; AVX512-NEXT: [[TMP42:%.*]] = fadd <8 x double> [[REVERSE25]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01> 1721; AVX512-NEXT: [[TMP43:%.*]] = fadd <8 x double> [[REVERSE28]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01> 1722; AVX512-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP0]] 1723; AVX512-NEXT: [[TMP45:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP1]] 1724; AVX512-NEXT: [[TMP46:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP2]] 1725; AVX512-NEXT: [[TMP47:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP3]] 1726; AVX512-NEXT: [[REVERSE29:%.*]] = shufflevector <8 x double> [[TMP40]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1727; AVX512-NEXT: [[TMP48:%.*]] = getelementptr double, double* [[TMP44]], i32 0 1728; AVX512-NEXT: [[TMP49:%.*]] = getelementptr double, double* [[TMP48]], i32 -7 1729; AVX512-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <8 x double>* 1730; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE29]], <8 x double>* [[TMP50]], i32 8, <8 x i1> [[REVERSE18]]), !alias.scope !36, !noalias !38 1731; AVX512-NEXT: [[REVERSE31:%.*]] = shufflevector <8 x double> [[TMP41]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1732; AVX512-NEXT: [[TMP51:%.*]] = getelementptr double, double* [[TMP44]], i32 -8 1733; AVX512-NEXT: [[TMP52:%.*]] = getelementptr double, double* [[TMP51]], i32 -7 1734; AVX512-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <8 x double>* 1735; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE31]], <8 x double>* [[TMP53]], i32 8, <8 x i1> [[REVERSE20]]), !alias.scope !36, !noalias !38 1736; AVX512-NEXT: [[REVERSE33:%.*]] = shufflevector <8 x double> [[TMP42]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1737; AVX512-NEXT: [[TMP54:%.*]] = getelementptr double, double* [[TMP44]], i32 -16 1738; AVX512-NEXT: [[TMP55:%.*]] = getelementptr double, double* [[TMP54]], i32 -7 1739; AVX512-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <8 x double>* 1740; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE33]], <8 x double>* [[TMP56]], i32 8, <8 x i1> [[REVERSE23]]), !alias.scope !36, !noalias !38 1741; AVX512-NEXT: [[REVERSE35:%.*]] = shufflevector <8 x double> [[TMP43]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1742; AVX512-NEXT: [[TMP57:%.*]] = getelementptr double, double* [[TMP44]], i32 -24 1743; AVX512-NEXT: [[TMP58:%.*]] = getelementptr double, double* [[TMP57]], i32 -7 1744; AVX512-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <8 x double>* 1745; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE35]], <8 x double>* [[TMP59]], i32 8, <8 x i1> [[REVERSE26]]), !alias.scope !36, !noalias !38 1746; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 1747; AVX512-NEXT: [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 1748; AVX512-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] 1749; AVX512: middle.block: 1750; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 1751; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1752; AVX512: scalar.ph: 1753; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[ENTRY:%.*]] ], [ 4095, [[VECTOR_MEMCHECK]] ] 1754; AVX512-NEXT: br label [[FOR_BODY:%.*]] 1755; AVX512: for.body: 1756; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1757; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] 1758; AVX512-NEXT: [[TMP61:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1759; AVX512-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP61]], 0 1760; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 1761; AVX512: if.then: 1762; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[INDVARS_IV]] 1763; AVX512-NEXT: [[TMP62:%.*]] = load double, double* [[ARRAYIDX3]], align 8 1764; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP62]], 5.000000e-01 1765; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]] 1766; AVX512-NEXT: store double [[ADD]], double* [[ARRAYIDX5]], align 8 1767; AVX512-NEXT: br label [[FOR_INC]] 1768; AVX512: for.inc: 1769; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 1770; AVX512-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 1771; AVX512-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] 1772; AVX512: for.end: 1773; AVX512-NEXT: ret void 1774; 1775entry: 1776 br label %for.body 1777 1778for.body: ; preds = %for.inc, %entry 1779 %indvars.iv = phi i64 [ 4095, %entry ], [ %indvars.iv.next, %for.inc ] 1780 %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv 1781 %0 = load i32, i32* %arrayidx, align 4 1782 %cmp1 = icmp sgt i32 %0, 0 1783 br i1 %cmp1, label %if.then, label %for.inc 1784 1785if.then: ; preds = %for.body 1786 %arrayidx3 = getelementptr inbounds double, double* %in, i64 %indvars.iv 1787 %1 = load double, double* %arrayidx3, align 8 1788 %add = fadd double %1, 5.000000e-01 1789 %arrayidx5 = getelementptr inbounds double, double* %out, i64 %indvars.iv 1790 store double %add, double* %arrayidx5, align 8 1791 br label %for.inc 1792 1793for.inc: ; preds = %for.body, %if.then 1794 %indvars.iv.next = add nsw i64 %indvars.iv, -1 1795 %cmp = icmp eq i64 %indvars.iv, 0 1796 br i1 %cmp, label %for.end, label %for.body 1797 1798for.end: ; preds = %for.inc 1799 ret void 1800} 1801 1802; void foo7 (double * __restrict__ out, double ** __restrict__ in, 1803; bool * __restrict__ trigger, unsigned size) { 1804; 1805; for (unsigned i=0; i<size; i++) 1806; if (trigger[i] && (in[i] != 0)) 1807; out[i] = (double) 0.5; 1808; } 1809 1810define void @foo7(double* noalias nocapture %out, double** noalias nocapture readonly %in, i8* noalias nocapture readonly %trigger, i32 %size) local_unnamed_addr #0 { 1811; AVX1-LABEL: @foo7( 1812; AVX1-NEXT: entry: 1813; AVX1-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 1814; AVX1-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 1815; AVX1: for.body.preheader: 1816; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 1817; AVX1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 1818; AVX1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1819; AVX1: vector.ph: 1820; AVX1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16 1821; AVX1-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 1822; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 1823; AVX1: vector.body: 1824; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1825; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1826; AVX1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 1827; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 1828; AVX1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 1829; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]] 1830; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]] 1831; AVX1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]] 1832; AVX1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]] 1833; AVX1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 1834; AVX1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>* 1835; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1 1836; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4 1837; AVX1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>* 1838; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1 1839; AVX1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8 1840; AVX1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>* 1841; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1 1842; AVX1-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12 1843; AVX1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>* 1844; AVX1-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1 1845; AVX1-NEXT: [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1> 1846; AVX1-NEXT: [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1> 1847; AVX1-NEXT: [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1> 1848; AVX1-NEXT: [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1> 1849; AVX1-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer 1850; AVX1-NEXT: [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer 1851; AVX1-NEXT: [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer 1852; AVX1-NEXT: [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer 1853; AVX1-NEXT: [[TMP24:%.*]] = getelementptr double*, double** [[IN:%.*]], i64 [[TMP0]] 1854; AVX1-NEXT: [[TMP25:%.*]] = getelementptr double*, double** [[IN]], i64 [[TMP1]] 1855; AVX1-NEXT: [[TMP26:%.*]] = getelementptr double*, double** [[IN]], i64 [[TMP2]] 1856; AVX1-NEXT: [[TMP27:%.*]] = getelementptr double*, double** [[IN]], i64 [[TMP3]] 1857; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true> 1858; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true> 1859; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true> 1860; AVX1-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true> 1861; AVX1-NEXT: [[TMP32:%.*]] = getelementptr double*, double** [[TMP24]], i32 0 1862; AVX1-NEXT: [[TMP33:%.*]] = bitcast double** [[TMP32]] to <4 x double*>* 1863; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> poison) 1864; AVX1-NEXT: [[TMP34:%.*]] = getelementptr double*, double** [[TMP24]], i32 4 1865; AVX1-NEXT: [[TMP35:%.*]] = bitcast double** [[TMP34]] to <4 x double*>* 1866; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> poison) 1867; AVX1-NEXT: [[TMP36:%.*]] = getelementptr double*, double** [[TMP24]], i32 8 1868; AVX1-NEXT: [[TMP37:%.*]] = bitcast double** [[TMP36]] to <4 x double*>* 1869; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> poison) 1870; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double*, double** [[TMP24]], i32 12 1871; AVX1-NEXT: [[TMP39:%.*]] = bitcast double** [[TMP38]] to <4 x double*>* 1872; AVX1-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> poison) 1873; AVX1-NEXT: [[TMP40:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer 1874; AVX1-NEXT: [[TMP41:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD4]], zeroinitializer 1875; AVX1-NEXT: [[TMP42:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD5]], zeroinitializer 1876; AVX1-NEXT: [[TMP43:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD6]], zeroinitializer 1877; AVX1-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[OUT:%.*]], i64 [[TMP0]] 1878; AVX1-NEXT: [[TMP45:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP1]] 1879; AVX1-NEXT: [[TMP46:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP2]] 1880; AVX1-NEXT: [[TMP47:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP3]] 1881; AVX1-NEXT: [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true> 1882; AVX1-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true> 1883; AVX1-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true> 1884; AVX1-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true> 1885; AVX1-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer 1886; AVX1-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer 1887; AVX1-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer 1888; AVX1-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer 1889; AVX1-NEXT: [[TMP56:%.*]] = getelementptr double, double* [[TMP44]], i32 0 1890; AVX1-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>* 1891; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]]) 1892; AVX1-NEXT: [[TMP58:%.*]] = getelementptr double, double* [[TMP44]], i32 4 1893; AVX1-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>* 1894; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]]) 1895; AVX1-NEXT: [[TMP60:%.*]] = getelementptr double, double* [[TMP44]], i32 8 1896; AVX1-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>* 1897; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]]) 1898; AVX1-NEXT: [[TMP62:%.*]] = getelementptr double, double* [[TMP44]], i32 12 1899; AVX1-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>* 1900; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]]) 1901; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 1902; AVX1-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1903; AVX1-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 1904; AVX1: middle.block: 1905; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 1906; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1907; AVX1: scalar.ph: 1908; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1909; AVX1-NEXT: br label [[FOR_BODY:%.*]] 1910; AVX1: for.body: 1911; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 1912; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]] 1913; AVX1-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 1914; AVX1-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1 1915; AVX1-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0 1916; AVX1-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 1917; AVX1: land.lhs.true: 1918; AVX1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[INDVARS_IV]] 1919; AVX1-NEXT: [[TMP67:%.*]] = load double*, double** [[ARRAYIDX2]], align 8 1920; AVX1-NEXT: [[CMP3:%.*]] = icmp eq double* [[TMP67]], null 1921; AVX1-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 1922; AVX1: if.then: 1923; AVX1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]] 1924; AVX1-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8 1925; AVX1-NEXT: br label [[FOR_INC]] 1926; AVX1: for.inc: 1927; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1928; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 1929; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 1930; AVX1: for.end.loopexit: 1931; AVX1-NEXT: br label [[FOR_END]] 1932; AVX1: for.end: 1933; AVX1-NEXT: ret void 1934; 1935; AVX2-LABEL: @foo7( 1936; AVX2-NEXT: entry: 1937; AVX2-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 1938; AVX2-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 1939; AVX2: for.body.preheader: 1940; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 1941; AVX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 1942; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1943; AVX2: vector.ph: 1944; AVX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16 1945; AVX2-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 1946; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 1947; AVX2: vector.body: 1948; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1949; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1950; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 1951; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 1952; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 1953; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]] 1954; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]] 1955; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]] 1956; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]] 1957; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 1958; AVX2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>* 1959; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1 1960; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4 1961; AVX2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>* 1962; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1 1963; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8 1964; AVX2-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>* 1965; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1 1966; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12 1967; AVX2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>* 1968; AVX2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1 1969; AVX2-NEXT: [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1> 1970; AVX2-NEXT: [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1> 1971; AVX2-NEXT: [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1> 1972; AVX2-NEXT: [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1> 1973; AVX2-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer 1974; AVX2-NEXT: [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer 1975; AVX2-NEXT: [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer 1976; AVX2-NEXT: [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer 1977; AVX2-NEXT: [[TMP24:%.*]] = getelementptr double*, double** [[IN:%.*]], i64 [[TMP0]] 1978; AVX2-NEXT: [[TMP25:%.*]] = getelementptr double*, double** [[IN]], i64 [[TMP1]] 1979; AVX2-NEXT: [[TMP26:%.*]] = getelementptr double*, double** [[IN]], i64 [[TMP2]] 1980; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double*, double** [[IN]], i64 [[TMP3]] 1981; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true> 1982; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true> 1983; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true> 1984; AVX2-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true> 1985; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double*, double** [[TMP24]], i32 0 1986; AVX2-NEXT: [[TMP33:%.*]] = bitcast double** [[TMP32]] to <4 x double*>* 1987; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> poison) 1988; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double*, double** [[TMP24]], i32 4 1989; AVX2-NEXT: [[TMP35:%.*]] = bitcast double** [[TMP34]] to <4 x double*>* 1990; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> poison) 1991; AVX2-NEXT: [[TMP36:%.*]] = getelementptr double*, double** [[TMP24]], i32 8 1992; AVX2-NEXT: [[TMP37:%.*]] = bitcast double** [[TMP36]] to <4 x double*>* 1993; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> poison) 1994; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double*, double** [[TMP24]], i32 12 1995; AVX2-NEXT: [[TMP39:%.*]] = bitcast double** [[TMP38]] to <4 x double*>* 1996; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> poison) 1997; AVX2-NEXT: [[TMP40:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer 1998; AVX2-NEXT: [[TMP41:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD4]], zeroinitializer 1999; AVX2-NEXT: [[TMP42:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD5]], zeroinitializer 2000; AVX2-NEXT: [[TMP43:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD6]], zeroinitializer 2001; AVX2-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[OUT:%.*]], i64 [[TMP0]] 2002; AVX2-NEXT: [[TMP45:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP1]] 2003; AVX2-NEXT: [[TMP46:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP2]] 2004; AVX2-NEXT: [[TMP47:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP3]] 2005; AVX2-NEXT: [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true> 2006; AVX2-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true> 2007; AVX2-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true> 2008; AVX2-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true> 2009; AVX2-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer 2010; AVX2-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer 2011; AVX2-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer 2012; AVX2-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer 2013; AVX2-NEXT: [[TMP56:%.*]] = getelementptr double, double* [[TMP44]], i32 0 2014; AVX2-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>* 2015; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]]) 2016; AVX2-NEXT: [[TMP58:%.*]] = getelementptr double, double* [[TMP44]], i32 4 2017; AVX2-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>* 2018; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]]) 2019; AVX2-NEXT: [[TMP60:%.*]] = getelementptr double, double* [[TMP44]], i32 8 2020; AVX2-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>* 2021; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]]) 2022; AVX2-NEXT: [[TMP62:%.*]] = getelementptr double, double* [[TMP44]], i32 12 2023; AVX2-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>* 2024; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]]) 2025; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 2026; AVX2-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2027; AVX2-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 2028; AVX2: middle.block: 2029; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2030; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 2031; AVX2: scalar.ph: 2032; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 2033; AVX2-NEXT: br label [[FOR_BODY:%.*]] 2034; AVX2: for.body: 2035; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 2036; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]] 2037; AVX2-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 2038; AVX2-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1 2039; AVX2-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0 2040; AVX2-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 2041; AVX2: land.lhs.true: 2042; AVX2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[INDVARS_IV]] 2043; AVX2-NEXT: [[TMP67:%.*]] = load double*, double** [[ARRAYIDX2]], align 8 2044; AVX2-NEXT: [[CMP3:%.*]] = icmp eq double* [[TMP67]], null 2045; AVX2-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 2046; AVX2: if.then: 2047; AVX2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]] 2048; AVX2-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8 2049; AVX2-NEXT: br label [[FOR_INC]] 2050; AVX2: for.inc: 2051; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 2052; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 2053; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 2054; AVX2: for.end.loopexit: 2055; AVX2-NEXT: br label [[FOR_END]] 2056; AVX2: for.end: 2057; AVX2-NEXT: ret void 2058; 2059; AVX512-LABEL: @foo7( 2060; AVX512-NEXT: entry: 2061; AVX512-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 2062; AVX512-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 2063; AVX512: for.body.preheader: 2064; AVX512-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 2065; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 2066; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2067; AVX512: vector.ph: 2068; AVX512-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 2069; AVX512-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 2070; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 2071; AVX512: vector.body: 2072; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2073; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2074; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 2075; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 2076; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24 2077; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]] 2078; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]] 2079; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]] 2080; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]] 2081; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 2082; AVX512-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <8 x i8>* 2083; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP9]], align 1 2084; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8 2085; AVX512-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <8 x i8>* 2086; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP11]], align 1 2087; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 16 2088; AVX512-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <8 x i8>* 2089; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP13]], align 1 2090; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 24 2091; AVX512-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <8 x i8>* 2092; AVX512-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, <8 x i8>* [[TMP15]], align 1 2093; AVX512-NEXT: [[TMP16:%.*]] = and <8 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2094; AVX512-NEXT: [[TMP17:%.*]] = and <8 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2095; AVX512-NEXT: [[TMP18:%.*]] = and <8 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2096; AVX512-NEXT: [[TMP19:%.*]] = and <8 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2097; AVX512-NEXT: [[TMP20:%.*]] = icmp eq <8 x i8> [[TMP16]], zeroinitializer 2098; AVX512-NEXT: [[TMP21:%.*]] = icmp eq <8 x i8> [[TMP17]], zeroinitializer 2099; AVX512-NEXT: [[TMP22:%.*]] = icmp eq <8 x i8> [[TMP18]], zeroinitializer 2100; AVX512-NEXT: [[TMP23:%.*]] = icmp eq <8 x i8> [[TMP19]], zeroinitializer 2101; AVX512-NEXT: [[TMP24:%.*]] = getelementptr double*, double** [[IN:%.*]], i64 [[TMP0]] 2102; AVX512-NEXT: [[TMP25:%.*]] = getelementptr double*, double** [[IN]], i64 [[TMP1]] 2103; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double*, double** [[IN]], i64 [[TMP2]] 2104; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double*, double** [[IN]], i64 [[TMP3]] 2105; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2106; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2107; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2108; AVX512-NEXT: [[TMP31:%.*]] = xor <8 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2109; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double*, double** [[TMP24]], i32 0 2110; AVX512-NEXT: [[TMP33:%.*]] = bitcast double** [[TMP32]] to <8 x double*>* 2111; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP33]], i32 8, <8 x i1> [[TMP28]], <8 x double*> poison) 2112; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double*, double** [[TMP24]], i32 8 2113; AVX512-NEXT: [[TMP35:%.*]] = bitcast double** [[TMP34]] to <8 x double*>* 2114; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP35]], i32 8, <8 x i1> [[TMP29]], <8 x double*> poison) 2115; AVX512-NEXT: [[TMP36:%.*]] = getelementptr double*, double** [[TMP24]], i32 16 2116; AVX512-NEXT: [[TMP37:%.*]] = bitcast double** [[TMP36]] to <8 x double*>* 2117; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP37]], i32 8, <8 x i1> [[TMP30]], <8 x double*> poison) 2118; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double*, double** [[TMP24]], i32 24 2119; AVX512-NEXT: [[TMP39:%.*]] = bitcast double** [[TMP38]] to <8 x double*>* 2120; AVX512-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>* [[TMP39]], i32 8, <8 x i1> [[TMP31]], <8 x double*> poison) 2121; AVX512-NEXT: [[TMP40:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer 2122; AVX512-NEXT: [[TMP41:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD4]], zeroinitializer 2123; AVX512-NEXT: [[TMP42:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD5]], zeroinitializer 2124; AVX512-NEXT: [[TMP43:%.*]] = icmp eq <8 x double*> [[WIDE_MASKED_LOAD6]], zeroinitializer 2125; AVX512-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[OUT:%.*]], i64 [[TMP0]] 2126; AVX512-NEXT: [[TMP45:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP1]] 2127; AVX512-NEXT: [[TMP46:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP2]] 2128; AVX512-NEXT: [[TMP47:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP3]] 2129; AVX512-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2130; AVX512-NEXT: [[TMP49:%.*]] = xor <8 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2131; AVX512-NEXT: [[TMP50:%.*]] = xor <8 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2132; AVX512-NEXT: [[TMP51:%.*]] = xor <8 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2133; AVX512-NEXT: [[TMP52:%.*]] = select <8 x i1> [[TMP28]], <8 x i1> [[TMP48]], <8 x i1> zeroinitializer 2134; AVX512-NEXT: [[TMP53:%.*]] = select <8 x i1> [[TMP29]], <8 x i1> [[TMP49]], <8 x i1> zeroinitializer 2135; AVX512-NEXT: [[TMP54:%.*]] = select <8 x i1> [[TMP30]], <8 x i1> [[TMP50]], <8 x i1> zeroinitializer 2136; AVX512-NEXT: [[TMP55:%.*]] = select <8 x i1> [[TMP31]], <8 x i1> [[TMP51]], <8 x i1> zeroinitializer 2137; AVX512-NEXT: [[TMP56:%.*]] = getelementptr double, double* [[TMP44]], i32 0 2138; AVX512-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <8 x double>* 2139; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP57]], i32 8, <8 x i1> [[TMP52]]) 2140; AVX512-NEXT: [[TMP58:%.*]] = getelementptr double, double* [[TMP44]], i32 8 2141; AVX512-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <8 x double>* 2142; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP59]], i32 8, <8 x i1> [[TMP53]]) 2143; AVX512-NEXT: [[TMP60:%.*]] = getelementptr double, double* [[TMP44]], i32 16 2144; AVX512-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <8 x double>* 2145; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP61]], i32 8, <8 x i1> [[TMP54]]) 2146; AVX512-NEXT: [[TMP62:%.*]] = getelementptr double, double* [[TMP44]], i32 24 2147; AVX512-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <8 x double>* 2148; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP63]], i32 8, <8 x i1> [[TMP55]]) 2149; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 2150; AVX512-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2151; AVX512-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] 2152; AVX512: middle.block: 2153; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2154; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 2155; AVX512: scalar.ph: 2156; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 2157; AVX512-NEXT: br label [[FOR_BODY:%.*]] 2158; AVX512: for.body: 2159; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 2160; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]] 2161; AVX512-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 2162; AVX512-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1 2163; AVX512-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0 2164; AVX512-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 2165; AVX512: land.lhs.true: 2166; AVX512-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[INDVARS_IV]] 2167; AVX512-NEXT: [[TMP67:%.*]] = load double*, double** [[ARRAYIDX2]], align 8 2168; AVX512-NEXT: [[CMP3:%.*]] = icmp eq double* [[TMP67]], null 2169; AVX512-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 2170; AVX512: if.then: 2171; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]] 2172; AVX512-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8 2173; AVX512-NEXT: br label [[FOR_INC]] 2174; AVX512: for.inc: 2175; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 2176; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 2177; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] 2178; AVX512: for.end.loopexit: 2179; AVX512-NEXT: br label [[FOR_END]] 2180; AVX512: for.end: 2181; AVX512-NEXT: ret void 2182; 2183entry: 2184 %cmp5 = icmp eq i32 %size, 0 2185 br i1 %cmp5, label %for.end, label %for.body.preheader 2186 2187for.body.preheader: ; preds = %entry 2188 %wide.trip.count = zext i32 %size to i64 2189 br label %for.body 2190 2191for.body: ; preds = %for.inc, %for.body.preheader 2192 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] 2193 %arrayidx = getelementptr inbounds i8, i8* %trigger, i64 %indvars.iv 2194 %0 = load i8, i8* %arrayidx, align 1 2195 %1 = and i8 %0, 1 2196 %tobool = icmp eq i8 %1, 0 2197 br i1 %tobool, label %for.inc, label %land.lhs.true 2198 2199land.lhs.true: ; preds = %for.body 2200 %arrayidx2 = getelementptr inbounds double*, double** %in, i64 %indvars.iv 2201 %2 = load double*, double** %arrayidx2, align 8 2202 %cmp3 = icmp eq double* %2, null 2203 br i1 %cmp3, label %for.inc, label %if.then 2204 2205if.then: ; preds = %land.lhs.true 2206 %arrayidx5 = getelementptr inbounds double, double* %out, i64 %indvars.iv 2207 store double 5.000000e-01, double* %arrayidx5, align 8 2208 br label %for.inc 2209 2210for.inc: ; preds = %land.lhs.true, %for.body, %if.then 2211 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 2212 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 2213 br i1 %exitcond, label %for.end, label %for.body 2214 2215for.end: ; preds = %for.inc, %entry 2216 ret void 2217} 2218 2219;typedef int (*fp)(); 2220;void foo8 (double* __restrict__ out, fp* __restrict__ in, bool * __restrict__ trigger, unsigned size) { 2221; 2222; for (unsigned i=0; i<size; i++) 2223; if (trigger[i] && (in[i] != 0)) 2224; out[i] = (double) 0.5; 2225;} 2226 2227define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture readonly %in, i8* noalias nocapture readonly %trigger, i32 %size) local_unnamed_addr #0 { 2228; AVX1-LABEL: @foo8( 2229; AVX1-NEXT: entry: 2230; AVX1-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 2231; AVX1-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 2232; AVX1: for.body.preheader: 2233; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 2234; AVX1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 2235; AVX1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2236; AVX1: vector.ph: 2237; AVX1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16 2238; AVX1-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 2239; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 2240; AVX1: vector.body: 2241; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2242; AVX1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2243; AVX1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 2244; AVX1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 2245; AVX1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 2246; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]] 2247; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]] 2248; AVX1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]] 2249; AVX1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]] 2250; AVX1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 2251; AVX1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>* 2252; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1 2253; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4 2254; AVX1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>* 2255; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1 2256; AVX1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8 2257; AVX1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>* 2258; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1 2259; AVX1-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12 2260; AVX1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>* 2261; AVX1-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1 2262; AVX1-NEXT: [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1> 2263; AVX1-NEXT: [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1> 2264; AVX1-NEXT: [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1> 2265; AVX1-NEXT: [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1> 2266; AVX1-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer 2267; AVX1-NEXT: [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer 2268; AVX1-NEXT: [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer 2269; AVX1-NEXT: [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer 2270; AVX1-NEXT: [[TMP24:%.*]] = getelementptr i32 ()*, i32 ()** [[IN:%.*]], i64 [[TMP0]] 2271; AVX1-NEXT: [[TMP25:%.*]] = getelementptr i32 ()*, i32 ()** [[IN]], i64 [[TMP1]] 2272; AVX1-NEXT: [[TMP26:%.*]] = getelementptr i32 ()*, i32 ()** [[IN]], i64 [[TMP2]] 2273; AVX1-NEXT: [[TMP27:%.*]] = getelementptr i32 ()*, i32 ()** [[IN]], i64 [[TMP3]] 2274; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true> 2275; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true> 2276; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true> 2277; AVX1-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true> 2278; AVX1-NEXT: [[TMP32:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 0 2279; AVX1-NEXT: [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <4 x i32 ()*>* 2280; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> poison) 2281; AVX1-NEXT: [[TMP34:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 4 2282; AVX1-NEXT: [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <4 x i32 ()*>* 2283; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> poison) 2284; AVX1-NEXT: [[TMP36:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 8 2285; AVX1-NEXT: [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <4 x i32 ()*>* 2286; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> poison) 2287; AVX1-NEXT: [[TMP38:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 12 2288; AVX1-NEXT: [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <4 x i32 ()*>* 2289; AVX1-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> poison) 2290; AVX1-NEXT: [[TMP40:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer 2291; AVX1-NEXT: [[TMP41:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD4]], zeroinitializer 2292; AVX1-NEXT: [[TMP42:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD5]], zeroinitializer 2293; AVX1-NEXT: [[TMP43:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD6]], zeroinitializer 2294; AVX1-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[OUT:%.*]], i64 [[TMP0]] 2295; AVX1-NEXT: [[TMP45:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP1]] 2296; AVX1-NEXT: [[TMP46:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP2]] 2297; AVX1-NEXT: [[TMP47:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP3]] 2298; AVX1-NEXT: [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true> 2299; AVX1-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true> 2300; AVX1-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true> 2301; AVX1-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true> 2302; AVX1-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer 2303; AVX1-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer 2304; AVX1-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer 2305; AVX1-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer 2306; AVX1-NEXT: [[TMP56:%.*]] = getelementptr double, double* [[TMP44]], i32 0 2307; AVX1-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>* 2308; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]]) 2309; AVX1-NEXT: [[TMP58:%.*]] = getelementptr double, double* [[TMP44]], i32 4 2310; AVX1-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>* 2311; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]]) 2312; AVX1-NEXT: [[TMP60:%.*]] = getelementptr double, double* [[TMP44]], i32 8 2313; AVX1-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>* 2314; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]]) 2315; AVX1-NEXT: [[TMP62:%.*]] = getelementptr double, double* [[TMP44]], i32 12 2316; AVX1-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>* 2317; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]]) 2318; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 2319; AVX1-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2320; AVX1-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 2321; AVX1: middle.block: 2322; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2323; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 2324; AVX1: scalar.ph: 2325; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 2326; AVX1-NEXT: br label [[FOR_BODY:%.*]] 2327; AVX1: for.body: 2328; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 2329; AVX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]] 2330; AVX1-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 2331; AVX1-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1 2332; AVX1-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0 2333; AVX1-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 2334; AVX1: land.lhs.true: 2335; AVX1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[INDVARS_IV]] 2336; AVX1-NEXT: [[TMP67:%.*]] = load i32 ()*, i32 ()** [[ARRAYIDX2]], align 8 2337; AVX1-NEXT: [[CMP3:%.*]] = icmp eq i32 ()* [[TMP67]], null 2338; AVX1-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 2339; AVX1: if.then: 2340; AVX1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]] 2341; AVX1-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8 2342; AVX1-NEXT: br label [[FOR_INC]] 2343; AVX1: for.inc: 2344; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 2345; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 2346; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 2347; AVX1: for.end.loopexit: 2348; AVX1-NEXT: br label [[FOR_END]] 2349; AVX1: for.end: 2350; AVX1-NEXT: ret void 2351; 2352; AVX2-LABEL: @foo8( 2353; AVX2-NEXT: entry: 2354; AVX2-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 2355; AVX2-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 2356; AVX2: for.body.preheader: 2357; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 2358; AVX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 2359; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2360; AVX2: vector.ph: 2361; AVX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16 2362; AVX2-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 2363; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 2364; AVX2: vector.body: 2365; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2366; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2367; AVX2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 2368; AVX2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 2369; AVX2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 2370; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]] 2371; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]] 2372; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]] 2373; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]] 2374; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 2375; AVX2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>* 2376; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1 2377; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4 2378; AVX2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>* 2379; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1 2380; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8 2381; AVX2-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>* 2382; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1 2383; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12 2384; AVX2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>* 2385; AVX2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1 2386; AVX2-NEXT: [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1> 2387; AVX2-NEXT: [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1> 2388; AVX2-NEXT: [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1> 2389; AVX2-NEXT: [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1> 2390; AVX2-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer 2391; AVX2-NEXT: [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer 2392; AVX2-NEXT: [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer 2393; AVX2-NEXT: [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer 2394; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32 ()*, i32 ()** [[IN:%.*]], i64 [[TMP0]] 2395; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32 ()*, i32 ()** [[IN]], i64 [[TMP1]] 2396; AVX2-NEXT: [[TMP26:%.*]] = getelementptr i32 ()*, i32 ()** [[IN]], i64 [[TMP2]] 2397; AVX2-NEXT: [[TMP27:%.*]] = getelementptr i32 ()*, i32 ()** [[IN]], i64 [[TMP3]] 2398; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true> 2399; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true> 2400; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true> 2401; AVX2-NEXT: [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true> 2402; AVX2-NEXT: [[TMP32:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 0 2403; AVX2-NEXT: [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <4 x i32 ()*>* 2404; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> poison) 2405; AVX2-NEXT: [[TMP34:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 4 2406; AVX2-NEXT: [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <4 x i32 ()*>* 2407; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> poison) 2408; AVX2-NEXT: [[TMP36:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 8 2409; AVX2-NEXT: [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <4 x i32 ()*>* 2410; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> poison) 2411; AVX2-NEXT: [[TMP38:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 12 2412; AVX2-NEXT: [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <4 x i32 ()*>* 2413; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> poison) 2414; AVX2-NEXT: [[TMP40:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer 2415; AVX2-NEXT: [[TMP41:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD4]], zeroinitializer 2416; AVX2-NEXT: [[TMP42:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD5]], zeroinitializer 2417; AVX2-NEXT: [[TMP43:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD6]], zeroinitializer 2418; AVX2-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[OUT:%.*]], i64 [[TMP0]] 2419; AVX2-NEXT: [[TMP45:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP1]] 2420; AVX2-NEXT: [[TMP46:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP2]] 2421; AVX2-NEXT: [[TMP47:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP3]] 2422; AVX2-NEXT: [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true> 2423; AVX2-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true> 2424; AVX2-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true> 2425; AVX2-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true> 2426; AVX2-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer 2427; AVX2-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer 2428; AVX2-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer 2429; AVX2-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer 2430; AVX2-NEXT: [[TMP56:%.*]] = getelementptr double, double* [[TMP44]], i32 0 2431; AVX2-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>* 2432; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]]) 2433; AVX2-NEXT: [[TMP58:%.*]] = getelementptr double, double* [[TMP44]], i32 4 2434; AVX2-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>* 2435; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]]) 2436; AVX2-NEXT: [[TMP60:%.*]] = getelementptr double, double* [[TMP44]], i32 8 2437; AVX2-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>* 2438; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]]) 2439; AVX2-NEXT: [[TMP62:%.*]] = getelementptr double, double* [[TMP44]], i32 12 2440; AVX2-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>* 2441; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]]) 2442; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 2443; AVX2-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2444; AVX2-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 2445; AVX2: middle.block: 2446; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2447; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 2448; AVX2: scalar.ph: 2449; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 2450; AVX2-NEXT: br label [[FOR_BODY:%.*]] 2451; AVX2: for.body: 2452; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 2453; AVX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]] 2454; AVX2-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 2455; AVX2-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1 2456; AVX2-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0 2457; AVX2-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 2458; AVX2: land.lhs.true: 2459; AVX2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[INDVARS_IV]] 2460; AVX2-NEXT: [[TMP67:%.*]] = load i32 ()*, i32 ()** [[ARRAYIDX2]], align 8 2461; AVX2-NEXT: [[CMP3:%.*]] = icmp eq i32 ()* [[TMP67]], null 2462; AVX2-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 2463; AVX2: if.then: 2464; AVX2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]] 2465; AVX2-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8 2466; AVX2-NEXT: br label [[FOR_INC]] 2467; AVX2: for.inc: 2468; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 2469; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 2470; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] 2471; AVX2: for.end.loopexit: 2472; AVX2-NEXT: br label [[FOR_END]] 2473; AVX2: for.end: 2474; AVX2-NEXT: ret void 2475; 2476; AVX512-LABEL: @foo8( 2477; AVX512-NEXT: entry: 2478; AVX512-NEXT: [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0 2479; AVX512-NEXT: br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 2480; AVX512: for.body.preheader: 2481; AVX512-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 2482; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 2483; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2484; AVX512: vector.ph: 2485; AVX512-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 2486; AVX512-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 2487; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] 2488; AVX512: vector.body: 2489; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2490; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 2491; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 2492; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 2493; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24 2494; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]] 2495; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]] 2496; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]] 2497; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]] 2498; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 2499; AVX512-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <8 x i8>* 2500; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP9]], align 1 2501; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8 2502; AVX512-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <8 x i8>* 2503; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP11]], align 1 2504; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 16 2505; AVX512-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <8 x i8>* 2506; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP13]], align 1 2507; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 24 2508; AVX512-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <8 x i8>* 2509; AVX512-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, <8 x i8>* [[TMP15]], align 1 2510; AVX512-NEXT: [[TMP16:%.*]] = and <8 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2511; AVX512-NEXT: [[TMP17:%.*]] = and <8 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2512; AVX512-NEXT: [[TMP18:%.*]] = and <8 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2513; AVX512-NEXT: [[TMP19:%.*]] = and <8 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2514; AVX512-NEXT: [[TMP20:%.*]] = icmp eq <8 x i8> [[TMP16]], zeroinitializer 2515; AVX512-NEXT: [[TMP21:%.*]] = icmp eq <8 x i8> [[TMP17]], zeroinitializer 2516; AVX512-NEXT: [[TMP22:%.*]] = icmp eq <8 x i8> [[TMP18]], zeroinitializer 2517; AVX512-NEXT: [[TMP23:%.*]] = icmp eq <8 x i8> [[TMP19]], zeroinitializer 2518; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32 ()*, i32 ()** [[IN:%.*]], i64 [[TMP0]] 2519; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32 ()*, i32 ()** [[IN]], i64 [[TMP1]] 2520; AVX512-NEXT: [[TMP26:%.*]] = getelementptr i32 ()*, i32 ()** [[IN]], i64 [[TMP2]] 2521; AVX512-NEXT: [[TMP27:%.*]] = getelementptr i32 ()*, i32 ()** [[IN]], i64 [[TMP3]] 2522; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2523; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2524; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2525; AVX512-NEXT: [[TMP31:%.*]] = xor <8 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2526; AVX512-NEXT: [[TMP32:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 0 2527; AVX512-NEXT: [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <8 x i32 ()*>* 2528; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP33]], i32 8, <8 x i1> [[TMP28]], <8 x i32 ()*> poison) 2529; AVX512-NEXT: [[TMP34:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 8 2530; AVX512-NEXT: [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <8 x i32 ()*>* 2531; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP35]], i32 8, <8 x i1> [[TMP29]], <8 x i32 ()*> poison) 2532; AVX512-NEXT: [[TMP36:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 16 2533; AVX512-NEXT: [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <8 x i32 ()*>* 2534; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP37]], i32 8, <8 x i1> [[TMP30]], <8 x i32 ()*> poison) 2535; AVX512-NEXT: [[TMP38:%.*]] = getelementptr i32 ()*, i32 ()** [[TMP24]], i32 24 2536; AVX512-NEXT: [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <8 x i32 ()*>* 2537; AVX512-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* [[TMP39]], i32 8, <8 x i1> [[TMP31]], <8 x i32 ()*> poison) 2538; AVX512-NEXT: [[TMP40:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer 2539; AVX512-NEXT: [[TMP41:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD4]], zeroinitializer 2540; AVX512-NEXT: [[TMP42:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD5]], zeroinitializer 2541; AVX512-NEXT: [[TMP43:%.*]] = icmp eq <8 x i32 ()*> [[WIDE_MASKED_LOAD6]], zeroinitializer 2542; AVX512-NEXT: [[TMP44:%.*]] = getelementptr double, double* [[OUT:%.*]], i64 [[TMP0]] 2543; AVX512-NEXT: [[TMP45:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP1]] 2544; AVX512-NEXT: [[TMP46:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP2]] 2545; AVX512-NEXT: [[TMP47:%.*]] = getelementptr double, double* [[OUT]], i64 [[TMP3]] 2546; AVX512-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2547; AVX512-NEXT: [[TMP49:%.*]] = xor <8 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2548; AVX512-NEXT: [[TMP50:%.*]] = xor <8 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2549; AVX512-NEXT: [[TMP51:%.*]] = xor <8 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 2550; AVX512-NEXT: [[TMP52:%.*]] = select <8 x i1> [[TMP28]], <8 x i1> [[TMP48]], <8 x i1> zeroinitializer 2551; AVX512-NEXT: [[TMP53:%.*]] = select <8 x i1> [[TMP29]], <8 x i1> [[TMP49]], <8 x i1> zeroinitializer 2552; AVX512-NEXT: [[TMP54:%.*]] = select <8 x i1> [[TMP30]], <8 x i1> [[TMP50]], <8 x i1> zeroinitializer 2553; AVX512-NEXT: [[TMP55:%.*]] = select <8 x i1> [[TMP31]], <8 x i1> [[TMP51]], <8 x i1> zeroinitializer 2554; AVX512-NEXT: [[TMP56:%.*]] = getelementptr double, double* [[TMP44]], i32 0 2555; AVX512-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <8 x double>* 2556; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP57]], i32 8, <8 x i1> [[TMP52]]) 2557; AVX512-NEXT: [[TMP58:%.*]] = getelementptr double, double* [[TMP44]], i32 8 2558; AVX512-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <8 x double>* 2559; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP59]], i32 8, <8 x i1> [[TMP53]]) 2560; AVX512-NEXT: [[TMP60:%.*]] = getelementptr double, double* [[TMP44]], i32 16 2561; AVX512-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <8 x double>* 2562; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP61]], i32 8, <8 x i1> [[TMP54]]) 2563; AVX512-NEXT: [[TMP62:%.*]] = getelementptr double, double* [[TMP44]], i32 24 2564; AVX512-NEXT: [[TMP63:%.*]] = bitcast double* [[TMP62]] to <8 x double>* 2565; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP63]], i32 8, <8 x i1> [[TMP55]]) 2566; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 2567; AVX512-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2568; AVX512-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] 2569; AVX512: middle.block: 2570; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 2571; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 2572; AVX512: scalar.ph: 2573; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 2574; AVX512-NEXT: br label [[FOR_BODY:%.*]] 2575; AVX512: for.body: 2576; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 2577; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]] 2578; AVX512-NEXT: [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 2579; AVX512-NEXT: [[TMP66:%.*]] = and i8 [[TMP65]], 1 2580; AVX512-NEXT: [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0 2581; AVX512-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]] 2582; AVX512: land.lhs.true: 2583; AVX512-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[INDVARS_IV]] 2584; AVX512-NEXT: [[TMP67:%.*]] = load i32 ()*, i32 ()** [[ARRAYIDX2]], align 8 2585; AVX512-NEXT: [[CMP3:%.*]] = icmp eq i32 ()* [[TMP67]], null 2586; AVX512-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]] 2587; AVX512: if.then: 2588; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]] 2589; AVX512-NEXT: store double 5.000000e-01, double* [[ARRAYIDX5]], align 8 2590; AVX512-NEXT: br label [[FOR_INC]] 2591; AVX512: for.inc: 2592; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 2593; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 2594; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] 2595; AVX512: for.end.loopexit: 2596; AVX512-NEXT: br label [[FOR_END]] 2597; AVX512: for.end: 2598; AVX512-NEXT: ret void 2599; 2600entry: 2601 %cmp5 = icmp eq i32 %size, 0 2602 br i1 %cmp5, label %for.end, label %for.body.preheader 2603 2604for.body.preheader: ; preds = %entry 2605 %wide.trip.count = zext i32 %size to i64 2606 br label %for.body 2607 2608for.body: ; preds = %for.inc, %for.body.preheader 2609 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] 2610 %arrayidx = getelementptr inbounds i8, i8* %trigger, i64 %indvars.iv 2611 %0 = load i8, i8* %arrayidx, align 1 2612 %1 = and i8 %0, 1 2613 %tobool = icmp eq i8 %1, 0 2614 br i1 %tobool, label %for.inc, label %land.lhs.true 2615 2616land.lhs.true: ; preds = %for.body 2617 %arrayidx2 = getelementptr inbounds i32 ()*, i32 ()** %in, i64 %indvars.iv 2618 %2 = load i32 ()*, i32 ()** %arrayidx2, align 8 2619 %cmp3 = icmp eq i32 ()* %2, null 2620 br i1 %cmp3, label %for.inc, label %if.then 2621 2622if.then: ; preds = %land.lhs.true 2623 %arrayidx5 = getelementptr inbounds double, double* %out, i64 %indvars.iv 2624 store double 5.000000e-01, double* %arrayidx5, align 8 2625 br label %for.inc 2626 2627for.inc: ; preds = %land.lhs.true, %for.body, %if.then 2628 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 2629 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 2630 br i1 %exitcond, label %for.end, label %for.body 2631 2632for.end: ; preds = %for.inc, %entry 2633 ret void 2634} 2635 2636attributes #0 = { norecurse nounwind } 2637