1; RUN: opt < %s -opaque-pointers -cache-line-size=32 -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck -check-prefix=SMALLER-CACHELINE %s 2; RUN: opt < %s -opaque-pointers -cache-line-size=256 -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck -check-prefix=LARGER-CACHELINE %s 3 4;; This test is similar to test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll, 5;; with differences that it tests the scenarios where an option for cache line size is 6;; specified with different values. 7 8; Check IndexedReference::computeRefCost can handle type differences between 9; Stride and TripCount 10 11; SMALLER-CACHELINE: Loop 'for.cond' has cost = 256 12; LARGER-CACHELINE: Loop 'for.cond' has cost = 32 13%struct._Handleitem = type { %struct._Handleitem* } 14 15define void @handle_to_ptr(%struct._Handleitem** %blocks) { 16; Preheader: 17entry: 18 br label %for.cond 19 20; Loop: 21for.cond: ; preds = %for.body, %entry 22 %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ] 23 %cmp = icmp ult i32 %i.0, 1024 24 br i1 %cmp, label %for.body, label %for.end 25 26for.body: ; preds = %for.cond 27 %idxprom = zext i32 %i.0 to i64 28 %arrayidx = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %blocks, i64 %idxprom 29 store %struct._Handleitem* null, %struct._Handleitem** %arrayidx, align 8 30 %inc = add nuw nsw i32 %i.0, 1 31 br label %for.cond 32 33; Exit blocks 34for.end: ; preds = %for.cond 35 ret void 36} 37 38; Check IndexedReference::computeRefCost can handle type differences between 39; Coeff and ElemSize. 40 41; SMALLER-CACHELINE: Loop 'for.cond' has cost = 100000000 42; SMALLER-CACHELINE: Loop 'for.cond1' has cost = 1000000 43; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 120000 44; LARGER-CACHELINE: Loop 'for.cond' has cost = 100000000 45; LARGER-CACHELINE: Loop 'for.cond1' has cost = 1000000 46; LARGER-CACHELINE: Loop 'for.cond5' has cost = 10000 47@data = external dso_local global [2 x [4 x [18 x i32]]], align 1 48 49define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) { 50entry: 51 br label %for.cond 52 53for.cond: 54 %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ] 55 %idxprom = zext i16 %i.0 to i32 56 br i1 %b2, label %for.end19, label %for.cond1 57 58for.cond1: 59 %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ] 60 br i1 %b1, label %for.inc17, label %for.cond5.preheader 61 62for.cond5.preheader: 63 %idxprom10 = zext i16 %j.0 to i32 64 br label %for.cond5 65 66for.cond5: 67 %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ] 68 br i1 %b0, label %for.inc14, label %for.inc 69 70for.inc: 71 %idxprom12 = zext i16 %k.0 to i32 72 %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12 73 store i32 7, ptr %arrayidx13, align 1 74 %inc = add nuw nsw i16 %k.0, 1 75 br label %for.cond5 76 77for.inc14: 78 %inc15 = add nuw nsw i16 %j.0, 1 79 br label %for.cond1 80 81for.inc17: 82 %inc18 = add nuw nsw i16 %i.0, 1 83 br label %for.cond 84 85for.end19: 86 ret void 87} 88 89; Check IndexedReference::computeRefCost can handle negative stride 90 91; SMALLER-CACHELINE: Loop 'for.neg.cond' has cost = 256 92; LARGER-CACHELINE: Loop 'for.neg.cond' has cost = 32 93define void @handle_to_ptr_neg_stride(%struct._Handleitem** %blocks) { 94; Preheader: 95entry: 96 br label %for.neg.cond 97 98; Loop: 99for.neg.cond: ; preds = %for.neg.body, %entry 100 %i.0 = phi i32 [ 1023, %entry ], [ %dec, %for.neg.body ] 101 %cmp = icmp sgt i32 %i.0, 0 102 br i1 %cmp, label %for.neg.body, label %for.neg.end 103 104for.neg.body: ; preds = %for.neg.cond 105 %idxprom = zext i32 %i.0 to i64 106 %arrayidx = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %blocks, i64 %idxprom 107 store %struct._Handleitem* null, %struct._Handleitem** %arrayidx, align 8 108 %dec = add nsw i32 %i.0, -1 109 br label %for.neg.cond 110 111; Exit blocks 112for.neg.end: ; preds = %for.neg.cond 113 ret void 114} 115 116 117 118; for (int i = 40960; i > 0; i--) 119; B[i] = B[40960 - i]; 120 121; FIXME: Currently negative access functions are treated the same as positive 122; access functions. When this is fixed this testcase should have a cost 123; approximately 2x higher. 124 125; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 10240 126; LARGER-CACHELINE: Loop 'for.cond2' has cost = 1280 127define void @Test2(double* %B) { 128entry: 129 br label %for.cond2 130 131for.cond2: ; preds = %for.body, %entry 132 %i.0 = phi i32 [ 40960, %entry ], [ %dec, %for.body ] 133 %cmp = icmp sgt i32 %i.0, 0 134 br i1 %cmp, label %for.body, label %for.end 135 136for.body: ; preds = %for.cond 137 %sub = sub nsw i32 40960, %i.0 138 %idxprom = sext i32 %sub to i64 139 %arrayidx = getelementptr inbounds double, double* %B, i64 %idxprom 140 %0 = load double, double* %arrayidx, align 8 141 %idxprom1 = sext i32 %i.0 to i64 142 %arrayidx2 = getelementptr inbounds double, double* %B, i64 %idxprom1 143 store double %0, double* %arrayidx2, align 8 144 %dec = add nsw i32 %i.0, -1 145 br label %for.cond2 146 147for.end: ; preds = %for.cond 148 ret void 149} 150 151 152 153; for (i = 40960; i > 0; i--) 154; C[i] = C[i]; 155 156; SMALLER-CACHELINE: Loop 'for.cond3' has cost = 10240 157; LARGER-CACHELINE: Loop 'for.cond3' has cost = 1280 158define void @Test3(double** %C) { 159entry: 160 br label %for.cond3 161 162for.cond3: ; preds = %for.body, %entry 163 %i.0 = phi i32 [ 40960, %entry ], [ %dec, %for.body ] 164 %cmp = icmp sgt i32 %i.0, 0 165 br i1 %cmp, label %for.body, label %for.end 166 167for.body: ; preds = %for.cond 168 %idxprom = sext i32 %i.0 to i64 169 %arrayidx = getelementptr inbounds double*, double** %C, i64 %idxprom 170 %0 = load double*, double** %arrayidx, align 8 171 %idxprom1 = sext i32 %i.0 to i64 172 %arrayidx2 = getelementptr inbounds double*, double** %C, i64 %idxprom1 173 store double* %0, double** %arrayidx2, align 8 174 %dec = add nsw i32 %i.0, -1 175 br label %for.cond3 176 177for.end: ; preds = %for.cond 178 ret void 179} 180 181 182 183; for (i = 0; i < 40960; i++) 184; D[i] = D[i]; 185 186; SMALLER-CACHELINE: Loop 'for.cond4' has cost = 10240 187; LARGER-CACHELINE: Loop 'for.cond4' has cost = 1280 188define void @Test4(double** %D) { 189entry: 190 br label %for.cond4 191 192for.cond4: ; preds = %for.body, %entry 193 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 194 %cmp = icmp slt i32 %i.0, 40960 195 br i1 %cmp, label %for.body, label %for.end 196 197for.body: ; preds = %for.cond 198 %idxprom = sext i32 %i.0 to i64 199 %arrayidx = getelementptr inbounds double*, double** %D, i64 %idxprom 200 %0 = load double*, double** %arrayidx, align 8 201 %idxprom1 = sext i32 %i.0 to i64 202 %arrayidx2 = getelementptr inbounds double*, double** %D, i64 %idxprom1 203 store double* %0, double** %arrayidx2, align 8 204 %inc = add nsw i32 %i.0, 1 205 br label %for.cond4 206 207for.end: ; preds = %for.cond 208 ret void 209} 210