1 // Copyright 2015 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "benchmark_runner.h" 16 17 #include "benchmark/benchmark.h" 18 #include "benchmark_api_internal.h" 19 #include "internal_macros.h" 20 21 #ifndef BENCHMARK_OS_WINDOWS 22 #ifndef BENCHMARK_OS_FUCHSIA 23 #include <sys/resource.h> 24 #endif 25 #include <sys/time.h> 26 #include <unistd.h> 27 #endif 28 29 #include <algorithm> 30 #include <atomic> 31 #include <condition_variable> 32 #include <cstdio> 33 #include <cstdlib> 34 #include <fstream> 35 #include <iostream> 36 #include <memory> 37 #include <string> 38 #include <thread> 39 #include <utility> 40 41 #include "check.h" 42 #include "colorprint.h" 43 #include "commandlineflags.h" 44 #include "complexity.h" 45 #include "counter.h" 46 #include "internal_macros.h" 47 #include "log.h" 48 #include "mutex.h" 49 #include "perf_counters.h" 50 #include "re.h" 51 #include "statistics.h" 52 #include "string_util.h" 53 #include "thread_manager.h" 54 #include "thread_timer.h" 55 56 namespace benchmark { 57 58 namespace internal { 59 60 MemoryManager* memory_manager = nullptr; 61 62 namespace { 63 64 static constexpr IterationCount kMaxIterations = 1000000000; 65 66 BenchmarkReporter::Run CreateRunReport( 67 const benchmark::internal::BenchmarkInstance& b, 68 const internal::ThreadManager::Result& results, 69 IterationCount memory_iterations, 70 const MemoryManager::Result* memory_result, double seconds, 71 int64_t repetition_index, int64_t repeats) { 72 // Create report about this benchmark run. 73 BenchmarkReporter::Run report; 74 75 report.run_name = b.name(); 76 report.family_index = b.family_index(); 77 report.per_family_instance_index = b.per_family_instance_index(); 78 report.error_occurred = results.has_error_; 79 report.error_message = results.error_message_; 80 report.report_label = results.report_label_; 81 // This is the total iterations across all threads. 82 report.iterations = results.iterations; 83 report.time_unit = b.time_unit(); 84 report.threads = b.threads(); 85 report.repetition_index = repetition_index; 86 report.repetitions = repeats; 87 88 if (!report.error_occurred) { 89 if (b.use_manual_time()) { 90 report.real_accumulated_time = results.manual_time_used; 91 } else { 92 report.real_accumulated_time = results.real_time_used; 93 } 94 report.cpu_accumulated_time = results.cpu_time_used; 95 report.complexity_n = results.complexity_n; 96 report.complexity = b.complexity(); 97 report.complexity_lambda = b.complexity_lambda(); 98 report.statistics = &b.statistics(); 99 report.counters = results.counters; 100 101 if (memory_iterations > 0) { 102 assert(memory_result != nullptr); 103 report.memory_result = memory_result; 104 report.allocs_per_iter = 105 memory_iterations ? static_cast<double>(memory_result->num_allocs) / 106 memory_iterations 107 : 0; 108 } 109 110 internal::Finish(&report.counters, results.iterations, seconds, 111 b.threads()); 112 } 113 return report; 114 } 115 116 // Execute one thread of benchmark b for the specified number of iterations. 117 // Adds the stats collected for the thread into manager->results. 118 void RunInThread(const BenchmarkInstance* b, IterationCount iters, 119 int thread_id, ThreadManager* manager, 120 PerfCountersMeasurement* perf_counters_measurement) { 121 internal::ThreadTimer timer( 122 b->measure_process_cpu_time() 123 ? internal::ThreadTimer::CreateProcessCpuTime() 124 : internal::ThreadTimer::Create()); 125 State st = 126 b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); 127 BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) 128 << "Benchmark returned before State::KeepRunning() returned false!"; 129 { 130 MutexLock l(manager->GetBenchmarkMutex()); 131 internal::ThreadManager::Result& results = manager->results; 132 results.iterations += st.iterations(); 133 results.cpu_time_used += timer.cpu_time_used(); 134 results.real_time_used += timer.real_time_used(); 135 results.manual_time_used += timer.manual_time_used(); 136 results.complexity_n += st.complexity_length_n(); 137 internal::Increment(&results.counters, st.counters); 138 } 139 manager->NotifyThreadComplete(); 140 } 141 142 } // end namespace 143 144 BenchmarkRunner::BenchmarkRunner( 145 const benchmark::internal::BenchmarkInstance& b_, 146 BenchmarkReporter::PerFamilyRunReports* reports_for_family_) 147 : b(b_), 148 reports_for_family(reports_for_family_), 149 min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), 150 repeats(b.repetitions() != 0 ? b.repetitions() 151 : FLAGS_benchmark_repetitions), 152 has_explicit_iteration_count(b.iterations() != 0), 153 pool(b.threads() - 1), 154 iters(has_explicit_iteration_count ? b.iterations() : 1), 155 perf_counters_measurement( 156 PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), 157 perf_counters_measurement_ptr(perf_counters_measurement.IsValid() 158 ? &perf_counters_measurement 159 : nullptr) { 160 run_results.display_report_aggregates_only = 161 (FLAGS_benchmark_report_aggregates_only || 162 FLAGS_benchmark_display_aggregates_only); 163 run_results.file_report_aggregates_only = 164 FLAGS_benchmark_report_aggregates_only; 165 if (b.aggregation_report_mode() != internal::ARM_Unspecified) { 166 run_results.display_report_aggregates_only = 167 (b.aggregation_report_mode() & 168 internal::ARM_DisplayReportAggregatesOnly); 169 run_results.file_report_aggregates_only = 170 (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); 171 BM_CHECK(FLAGS_benchmark_perf_counters.empty() || 172 perf_counters_measurement.IsValid()) 173 << "Perf counters were requested but could not be set up."; 174 } 175 } 176 177 BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { 178 BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; 179 180 std::unique_ptr<internal::ThreadManager> manager; 181 manager.reset(new internal::ThreadManager(b.threads())); 182 183 // Run all but one thread in separate threads 184 for (std::size_t ti = 0; ti < pool.size(); ++ti) { 185 pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1), 186 manager.get(), perf_counters_measurement_ptr); 187 } 188 // And run one thread here directly. 189 // (If we were asked to run just one thread, we don't create new threads.) 190 // Yes, we need to do this here *after* we start the separate threads. 191 RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr); 192 193 // The main thread has finished. Now let's wait for the other threads. 194 manager->WaitForAllThreads(); 195 for (std::thread& thread : pool) thread.join(); 196 197 IterationResults i; 198 // Acquire the measurements/counters from the manager, UNDER THE LOCK! 199 { 200 MutexLock l(manager->GetBenchmarkMutex()); 201 i.results = manager->results; 202 } 203 204 // And get rid of the manager. 205 manager.reset(); 206 207 // Adjust real/manual time stats since they were reported per thread. 208 i.results.real_time_used /= b.threads(); 209 i.results.manual_time_used /= b.threads(); 210 // If we were measuring whole-process CPU usage, adjust the CPU time too. 211 if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); 212 213 BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" 214 << i.results.real_time_used << "\n"; 215 216 // By using KeepRunningBatch a benchmark can iterate more times than 217 // requested, so take the iteration count from i.results. 218 i.iters = i.results.iterations / b.threads(); 219 220 // Base decisions off of real time if requested by this benchmark. 221 i.seconds = i.results.cpu_time_used; 222 if (b.use_manual_time()) { 223 i.seconds = i.results.manual_time_used; 224 } else if (b.use_real_time()) { 225 i.seconds = i.results.real_time_used; 226 } 227 228 return i; 229 } 230 231 IterationCount BenchmarkRunner::PredictNumItersNeeded( 232 const IterationResults& i) const { 233 // See how much iterations should be increased by. 234 // Note: Avoid division by zero with max(seconds, 1ns). 235 double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); 236 // If our last run was at least 10% of FLAGS_benchmark_min_time then we 237 // use the multiplier directly. 238 // Otherwise we use at most 10 times expansion. 239 // NOTE: When the last run was at least 10% of the min time the max 240 // expansion should be 14x. 241 bool is_significant = (i.seconds / min_time) > 0.1; 242 multiplier = is_significant ? multiplier : 10.0; 243 244 // So what seems to be the sufficiently-large iteration count? Round up. 245 const IterationCount max_next_iters = static_cast<IterationCount>( 246 std::lround(std::max(multiplier * static_cast<double>(i.iters), 247 static_cast<double>(i.iters) + 1.0))); 248 // But we do have *some* sanity limits though.. 249 const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); 250 251 BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; 252 return next_iters; // round up before conversion to integer. 253 } 254 255 bool BenchmarkRunner::ShouldReportIterationResults( 256 const IterationResults& i) const { 257 // Determine if this run should be reported; 258 // Either it has run for a sufficient amount of time 259 // or because an error was reported. 260 return i.results.has_error_ || 261 i.iters >= kMaxIterations || // Too many iterations already. 262 i.seconds >= min_time || // The elapsed time is large enough. 263 // CPU time is specified but the elapsed real time greatly exceeds 264 // the minimum time. 265 // Note that user provided timers are except from this sanity check. 266 ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); 267 } 268 269 void BenchmarkRunner::DoOneRepetition() { 270 assert(HasRepeatsRemaining() && "Already done all repetitions?"); 271 272 const bool is_the_first_repetition = num_repetitions_done == 0; 273 IterationResults i; 274 275 // We *may* be gradually increasing the length (iteration count) 276 // of the benchmark until we decide the results are significant. 277 // And once we do, we report those last results and exit. 278 // Please do note that the if there are repetitions, the iteration count 279 // is *only* calculated for the *first* repetition, and other repetitions 280 // simply use that precomputed iteration count. 281 for (;;) { 282 b.Setup(); 283 i = DoNIterations(); 284 b.Teardown(); 285 286 // Do we consider the results to be significant? 287 // If we are doing repetitions, and the first repetition was already done, 288 // it has calculated the correct iteration time, so we have run that very 289 // iteration count just now. No need to calculate anything. Just report. 290 // Else, the normal rules apply. 291 const bool results_are_significant = !is_the_first_repetition || 292 has_explicit_iteration_count || 293 ShouldReportIterationResults(i); 294 295 if (results_are_significant) break; // Good, let's report them! 296 297 // Nope, bad iteration. Let's re-estimate the hopefully-sufficient 298 // iteration count, and run the benchmark again... 299 300 iters = PredictNumItersNeeded(i); 301 assert(iters > i.iters && 302 "if we did more iterations than we want to do the next time, " 303 "then we should have accepted the current iteration run."); 304 } 305 306 // Oh, one last thing, we need to also produce the 'memory measurements'.. 307 MemoryManager::Result* memory_result = nullptr; 308 IterationCount memory_iterations = 0; 309 if (memory_manager != nullptr) { 310 // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an 311 // optional so we don't have to own the Result here. 312 // Can't do it now due to cxx03. 313 memory_results.push_back(MemoryManager::Result()); 314 memory_result = &memory_results.back(); 315 // Only run a few iterations to reduce the impact of one-time 316 // allocations in benchmarks that are not properly managed. 317 memory_iterations = std::min<IterationCount>(16, iters); 318 memory_manager->Start(); 319 std::unique_ptr<internal::ThreadManager> manager; 320 manager.reset(new internal::ThreadManager(1)); 321 b.Setup(); 322 RunInThread(&b, memory_iterations, 0, manager.get(), 323 perf_counters_measurement_ptr); 324 manager->WaitForAllThreads(); 325 manager.reset(); 326 b.Teardown(); 327 328 BENCHMARK_DISABLE_DEPRECATED_WARNING 329 memory_manager->Stop(memory_result); 330 BENCHMARK_RESTORE_DEPRECATED_WARNING 331 } 332 333 // Ok, now actually report. 334 BenchmarkReporter::Run report = 335 CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, 336 num_repetitions_done, repeats); 337 338 if (reports_for_family) { 339 ++reports_for_family->num_runs_done; 340 if (!report.error_occurred) reports_for_family->Runs.push_back(report); 341 } 342 343 run_results.non_aggregates.push_back(report); 344 345 ++num_repetitions_done; 346 } 347 348 RunResults&& BenchmarkRunner::GetResults() { 349 assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?"); 350 351 // Calculate additional statistics over the repetitions of this instance. 352 run_results.aggregates_only = ComputeStats(run_results.non_aggregates); 353 354 return std::move(run_results); 355 } 356 357 } // end namespace internal 358 359 } // end namespace benchmark 360