1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -basic-aa -loop-distribute -enable-loop-distribute -S -enable-mem-access-versioning=0 < %s | FileCheck %s 3 4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 5 6; PredicatedScalarEvolution decides it needs to insert a bounds check 7; not based on memory access. 8 9define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %d, i32* noalias %e, i64 %N) { 10; CHECK-LABEL: @f( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: [[A5:%.*]] = bitcast i32* [[A:%.*]] to i8* 13; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]] 14; CHECK: for.body.lver.check: 15; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 16; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 17; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]]) 18; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 19; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 20; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]] 21; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] 22; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0 23; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0 24; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] 25; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 26; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] 27; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] 28; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] 29; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) 30; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 31; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 32; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] 33; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] 34; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]] 35; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]] 36; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]] 37; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] 38; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] 39; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] 40; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] 41; CHECK: for.body.ph.lver.orig: 42; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 43; CHECK: for.body.lver.orig: 44; CHECK-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 45; CHECK-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 46; CHECK-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 47; CHECK-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 48; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LVER_ORIG]] 49; CHECK-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXA_LVER_ORIG]], align 4 50; CHECK-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]] 51; CHECK-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXB_LVER_ORIG]], align 4 52; CHECK-NEXT: [[MULA_LVER_ORIG:%.*]] = mul i32 [[LOADB_LVER_ORIG]], [[LOADA_LVER_ORIG]] 53; CHECK-NEXT: [[ADD_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 54; CHECK-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1 55; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LVER_ORIG]] 56; CHECK-NEXT: store i32 [[MULA_LVER_ORIG]], i32* [[ARRAYIDXA_PLUS_4_LVER_ORIG]], align 4 57; CHECK-NEXT: [[ARRAYIDXD_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT_LVER_ORIG]] 58; CHECK-NEXT: [[LOADD_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXD_LVER_ORIG]], align 4 59; CHECK-NEXT: [[ARRAYIDXE_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT_LVER_ORIG]] 60; CHECK-NEXT: [[LOADE_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXE_LVER_ORIG]], align 4 61; CHECK-NEXT: [[MULC_LVER_ORIG:%.*]] = mul i32 [[LOADD_LVER_ORIG]], [[LOADE_LVER_ORIG]] 62; CHECK-NEXT: [[ARRAYIDXC_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT_LVER_ORIG]] 63; CHECK-NEXT: store i32 [[MULC_LVER_ORIG]], i32* [[ARRAYIDXC_LVER_ORIG]], align 4 64; CHECK-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[ADD_LVER_ORIG]], [[N]] 65; CHECK-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 66; CHECK: for.body.ph.ldist1: 67; CHECK-NEXT: br label [[FOR_BODY_LDIST1:%.*]] 68; CHECK: for.body.ldist1: 69; CHECK-NEXT: [[IND_LDIST1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[ADD_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] 70; CHECK-NEXT: [[IND1_LDIST1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[INC1_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] 71; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2 72; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64 73; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LDIST1]] 74; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !0 75; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[MUL_EXT_LDIST1]] 76; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXB_LDIST1]], align 4 77; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]] 78; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1 79; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1 80; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LDIST1]] 81; CHECK-NEXT: store i32 [[MULA_LDIST1]], i32* [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !3 82; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]] 83; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]] 84; CHECK: for.body.ph: 85; CHECK-NEXT: br label [[FOR_BODY:%.*]] 86; CHECK: for.body: 87; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 88; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] 89; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 90; CHECK-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 91; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1 92; CHECK-NEXT: [[INC1]] = add i32 [[IND1]], 1 93; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D]], i64 [[MUL_EXT]] 94; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4 95; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E]], i64 [[MUL_EXT]] 96; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4 97; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]] 98; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]] 99; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4 100; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]] 101; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] 102; CHECK: for.end.loopexit: 103; CHECK-NEXT: br label [[FOR_END:%.*]] 104; CHECK: for.end.loopexit6: 105; CHECK-NEXT: br label [[FOR_END]] 106; CHECK: for.end: 107; CHECK-NEXT: ret void 108; 109entry: 110 br label %for.body 111 112for.body: ; preds = %for.body, %entry 113 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 114 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ] 115 116 %mul = mul i32 %ind1, 2 117 %mul_ext = zext i32 %mul to i64 118 119 120 %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %mul_ext 121 %loadA = load i32, i32* %arrayidxA, align 4 122 123 %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %mul_ext 124 %loadB = load i32, i32* %arrayidxB, align 4 125 126 %mulA = mul i32 %loadB, %loadA 127 128 %add = add nuw nsw i64 %ind, 1 129 %inc1 = add i32 %ind1, 1 130 131 %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add 132 store i32 %mulA, i32* %arrayidxA_plus_4, align 4 133 134 %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %mul_ext 135 %loadD = load i32, i32* %arrayidxD, align 4 136 137 %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %mul_ext 138 %loadE = load i32, i32* %arrayidxE, align 4 139 140 %mulC = mul i32 %loadD, %loadE 141 142 %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %mul_ext 143 store i32 %mulC, i32* %arrayidxC, align 4 144 145 %exitcond = icmp eq i64 %add, %N 146 br i1 %exitcond, label %for.end, label %for.body 147 148for.end: ; preds = %for.body 149 ret void 150} 151 152declare void @use64(i64) 153@global_a = common local_unnamed_addr global [8192 x i32] zeroinitializer, align 16 154 155define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i32* noalias %e, i64 %N) { 156; CHECK-LABEL: @f_with_offset( 157; CHECK-NEXT: entry: 158; CHECK-NEXT: [[A_BASE:%.*]] = getelementptr [8192 x i32], [8192 x i32]* @global_a, i32 0, i32 0 159; CHECK-NEXT: [[A_INTPTR:%.*]] = ptrtoint i32* [[A_BASE]] to i64 160; CHECK-NEXT: call void @use64(i64 [[A_INTPTR]]) 161; CHECK-NEXT: [[A:%.*]] = getelementptr i32, i32* [[A_BASE]], i32 42 162; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]] 163; CHECK: for.body.lver.check: 164; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 165; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 166; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]]) 167; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 168; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 169; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]] 170; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]] 171; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0 172; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0 173; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] 174; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 175; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] 176; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] 177; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] 178; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) 179; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 180; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 181; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]] 182; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]] 183; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP11]] 184; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) 185; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*) 186; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]] 187; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]] 188; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]] 189; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] 190; CHECK: for.body.ph.lver.orig: 191; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] 192; CHECK: for.body.lver.orig: 193; CHECK-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 194; CHECK-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] 195; CHECK-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 196; CHECK-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 197; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LVER_ORIG]] 198; CHECK-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXA_LVER_ORIG]], align 4 199; CHECK-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]] 200; CHECK-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXB_LVER_ORIG]], align 4 201; CHECK-NEXT: [[MULA_LVER_ORIG:%.*]] = mul i32 [[LOADB_LVER_ORIG]], [[LOADA_LVER_ORIG]] 202; CHECK-NEXT: [[ADD_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1 203; CHECK-NEXT: [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1 204; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LVER_ORIG]] 205; CHECK-NEXT: store i32 [[MULA_LVER_ORIG]], i32* [[ARRAYIDXA_PLUS_4_LVER_ORIG]], align 4 206; CHECK-NEXT: [[ARRAYIDXD_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT_LVER_ORIG]] 207; CHECK-NEXT: [[LOADD_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXD_LVER_ORIG]], align 4 208; CHECK-NEXT: [[ARRAYIDXE_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT_LVER_ORIG]] 209; CHECK-NEXT: [[LOADE_LVER_ORIG:%.*]] = load i32, i32* [[ARRAYIDXE_LVER_ORIG]], align 4 210; CHECK-NEXT: [[MULC_LVER_ORIG:%.*]] = mul i32 [[LOADD_LVER_ORIG]], [[LOADE_LVER_ORIG]] 211; CHECK-NEXT: [[ARRAYIDXC_LVER_ORIG:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT_LVER_ORIG]] 212; CHECK-NEXT: store i32 [[MULC_LVER_ORIG]], i32* [[ARRAYIDXC_LVER_ORIG]], align 4 213; CHECK-NEXT: [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[ADD_LVER_ORIG]], [[N]] 214; CHECK-NEXT: br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]] 215; CHECK: for.body.ph.ldist1: 216; CHECK-NEXT: br label [[FOR_BODY_LDIST1:%.*]] 217; CHECK: for.body.ldist1: 218; CHECK-NEXT: [[IND_LDIST1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[ADD_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] 219; CHECK-NEXT: [[IND1_LDIST1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[INC1_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ] 220; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2 221; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64 222; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LDIST1]] 223; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !5 224; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[MUL_EXT_LDIST1]] 225; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXB_LDIST1]], align 4 226; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]] 227; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1 228; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1 229; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LDIST1]] 230; CHECK-NEXT: store i32 [[MULA_LDIST1]], i32* [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !8 231; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]] 232; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]] 233; CHECK: for.body.ph: 234; CHECK-NEXT: br label [[FOR_BODY:%.*]] 235; CHECK: for.body: 236; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 237; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] 238; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 239; CHECK-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 240; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1 241; CHECK-NEXT: [[INC1]] = add i32 [[IND1]], 1 242; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D]], i64 [[MUL_EXT]] 243; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4 244; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E]], i64 [[MUL_EXT]] 245; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4 246; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]] 247; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]] 248; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4 249; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]] 250; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]] 251; CHECK: for.end.loopexit: 252; CHECK-NEXT: br label [[FOR_END:%.*]] 253; CHECK: for.end.loopexit5: 254; CHECK-NEXT: br label [[FOR_END]] 255; CHECK: for.end: 256; CHECK-NEXT: ret void 257; 258entry: 259 %a_base = getelementptr [8192 x i32], [8192 x i32]* @global_a, i32 0, i32 0 260 %a_intptr = ptrtoint i32* %a_base to i64 261 call void @use64(i64 %a_intptr) 262 %a = getelementptr i32, i32* %a_base, i32 42 263 br label %for.body 264 265for.body: ; preds = %for.body, %entry 266 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 267 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ] 268 269 %mul = mul i32 %ind1, 2 270 %mul_ext = zext i32 %mul to i64 271 272 273 %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %mul_ext 274 %loadA = load i32, i32* %arrayidxA, align 4 275 276 %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %mul_ext 277 %loadB = load i32, i32* %arrayidxB, align 4 278 279 %mulA = mul i32 %loadB, %loadA 280 281 %add = add nuw nsw i64 %ind, 1 282 %inc1 = add i32 %ind1, 1 283 284 %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add 285 store i32 %mulA, i32* %arrayidxA_plus_4, align 4 286 287 %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %mul_ext 288 %loadD = load i32, i32* %arrayidxD, align 4 289 290 %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %mul_ext 291 %loadE = load i32, i32* %arrayidxE, align 4 292 293 %mulC = mul i32 %loadD, %loadE 294 295 %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %mul_ext 296 store i32 %mulC, i32* %arrayidxC, align 4 297 298 %exitcond = icmp eq i64 %add, %N 299 br i1 %exitcond, label %for.end, label %for.body 300 301for.end: ; preds = %for.body 302 ret void 303} 304 305; Can't add control dependency with convergent in loop body. 306define void @f_with_convergent(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %d, i32* noalias %e, i64 %N) #1 { 307; CHECK-LABEL: @f_with_convergent( 308; CHECK-NEXT: entry: 309; CHECK-NEXT: br label [[FOR_BODY:%.*]] 310; CHECK: for.body: 311; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 312; CHECK-NEXT: [[IND1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] 313; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[IND1]], 2 314; CHECK-NEXT: [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64 315; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[MUL_EXT]] 316; CHECK-NEXT: [[LOADA:%.*]] = load i32, i32* [[ARRAYIDXA]], align 4 317; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[MUL_EXT]] 318; CHECK-NEXT: [[LOADB:%.*]] = load i32, i32* [[ARRAYIDXB]], align 4 319; CHECK-NEXT: [[MULA:%.*]] = mul i32 [[LOADB]], [[LOADA]] 320; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1 321; CHECK-NEXT: [[INC1]] = add i32 [[IND1]], 1 322; CHECK-NEXT: [[ARRAYIDXA_PLUS_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD]] 323; CHECK-NEXT: store i32 [[MULA]], i32* [[ARRAYIDXA_PLUS_4]], align 4 324; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D:%.*]], i64 [[MUL_EXT]] 325; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4 326; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E:%.*]], i64 [[MUL_EXT]] 327; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4 328; CHECK-NEXT: [[CONVERGENTD:%.*]] = call i32 @llvm.convergent(i32 [[LOADD]]) 329; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[CONVERGENTD]], [[LOADE]] 330; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[MUL_EXT]] 331; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4 332; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N:%.*]] 333; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 334; CHECK: for.end: 335; CHECK-NEXT: ret void 336; 337entry: 338 br label %for.body 339 340for.body: ; preds = %for.body, %entry 341 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 342 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ] 343 344 %mul = mul i32 %ind1, 2 345 %mul_ext = zext i32 %mul to i64 346 347 348 %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %mul_ext 349 %loadA = load i32, i32* %arrayidxA, align 4 350 351 %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %mul_ext 352 %loadB = load i32, i32* %arrayidxB, align 4 353 354 %mulA = mul i32 %loadB, %loadA 355 356 %add = add nuw nsw i64 %ind, 1 357 %inc1 = add i32 %ind1, 1 358 359 %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add 360 store i32 %mulA, i32* %arrayidxA_plus_4, align 4 361 362 %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %mul_ext 363 %loadD = load i32, i32* %arrayidxD, align 4 364 365 %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %mul_ext 366 %loadE = load i32, i32* %arrayidxE, align 4 367 368 %convergentD = call i32 @llvm.convergent(i32 %loadD) 369 %mulC = mul i32 %convergentD, %loadE 370 371 %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %mul_ext 372 store i32 %mulC, i32* %arrayidxC, align 4 373 374 %exitcond = icmp eq i64 %add, %N 375 br i1 %exitcond, label %for.end, label %for.body 376 377for.end: ; preds = %for.body 378 ret void 379} 380 381declare i32 @llvm.convergent(i32) #0 382 383attributes #0 = { nounwind readnone convergent } 384attributes #1 = { nounwind convergent } 385