1#!/usr/bin/env python3 2# SPDX-License-Identifier: BSD-3-Clause 3# Copyright(c) 2010-2014 Intel Corporation 4 5# The main logic behind running autotests in parallel 6 7import io 8import csv 9from multiprocessing import Pool, Queue 10import pexpect 11import re 12import subprocess 13import sys 14import time 15import glob 16import os 17 18# wait for prompt 19def wait_prompt(child): 20 try: 21 child.sendline() 22 result = child.expect(["RTE>>", pexpect.TIMEOUT, pexpect.EOF], 23 timeout=120) 24 except: 25 return False 26 if result == 0: 27 return True 28 else: 29 return False 30 31 32# get all valid NUMA nodes 33def get_numa_nodes(): 34 return [ 35 int( 36 re.match(r"node(\d+)", os.path.basename(node)) 37 .group(1) 38 ) 39 for node in glob.glob("/sys/devices/system/node/node*") 40 ] 41 42 43# find first (or any, really) CPU on a particular node, will be used to spread 44# processes around NUMA nodes to avoid exhausting memory on particular node 45def first_cpu_on_node(node_nr): 46 cpu_path = glob.glob("/sys/devices/system/node/node%d/cpu*" % node_nr) 47 r = re.compile(r"cpu(\d+)") 48 cpu_name = filter(None, 49 map(r.match, 50 map(os.path.basename, cpu_path) 51 ) 52 ) 53 return int(next(cpu_name).group(1)) 54 55pool_child = None # per-process child 56 57 58# we initialize each worker with a queue because we need per-pool unique 59# command-line arguments, but we cannot do different arguments in an initializer 60# because the API doesn't allow per-worker initializer arguments. so, instead, 61# we will initialize with a shared queue, and dequeue command-line arguments 62# from this queue 63def pool_init(queue, result_queue): 64 global pool_child 65 66 cmdline, prefix = queue.get() 67 start_time = time.time() 68 name = ("Start %s" % prefix) if prefix != "" else "Start" 69 70 # use default prefix if no prefix was specified 71 prefix_cmdline = "--file-prefix=%s" % prefix if prefix != "" else "" 72 73 # append prefix to cmdline 74 cmdline = "%s %s" % (cmdline, prefix_cmdline) 75 76 # prepare logging of init 77 startuplog = io.StringIO() 78 79 # run test app 80 try: 81 82 print("\n%s %s\n" % ("=" * 20, prefix), file=startuplog) 83 print("\ncmdline=%s" % cmdline, file=startuplog) 84 85 pool_child = pexpect.spawn(cmdline, logfile=startuplog, encoding='utf-8') 86 # wait for target to boot 87 if not wait_prompt(pool_child): 88 pool_child.close() 89 90 result = tuple((-1, 91 "Fail [No prompt]", 92 name, 93 time.time() - start_time, 94 startuplog.getvalue(), 95 None)) 96 pool_child = None 97 else: 98 result = tuple((0, 99 "Success", 100 name, 101 time.time() - start_time, 102 startuplog.getvalue(), 103 None)) 104 except: 105 result = tuple((-1, 106 "Fail [Can't run]", 107 name, 108 time.time() - start_time, 109 startuplog.getvalue(), 110 None)) 111 pool_child = None 112 113 result_queue.put(result) 114 115 116# run a test 117# each result tuple in results list consists of: 118# result value (0 or -1) 119# result string 120# test name 121# total test run time (double) 122# raw test log 123# test report (if not available, should be None) 124# 125# this function needs to be outside AutotestRunner class because otherwise Pool 126# won't work (or rather it will require quite a bit of effort to make it work). 127def run_test(target, test): 128 global pool_child 129 130 if pool_child is None: 131 return -1, "Fail [No test process]", test["Name"], 0, "", None 132 133 # create log buffer for each test 134 # in multiprocessing environment, the logging would be 135 # interleaved and will create a mess, hence the buffering 136 logfile = io.StringIO() 137 pool_child.logfile = logfile 138 139 # make a note when the test started 140 start_time = time.time() 141 142 try: 143 # print test name to log buffer 144 print("\n%s %s\n" % ("-" * 20, test["Name"]), file=logfile) 145 146 # run test function associated with the test 147 result = test["Func"](pool_child, test["Command"]) 148 149 # make a note when the test was finished 150 end_time = time.time() 151 152 log = logfile.getvalue() 153 154 # append test data to the result tuple 155 result += (test["Name"], end_time - start_time, log) 156 157 # call report function, if any defined, and supply it with 158 # target and complete log for test run 159 if test["Report"]: 160 report = test["Report"](target, log) 161 162 # append report to results tuple 163 result += (report,) 164 else: 165 # report is None 166 result += (None,) 167 except: 168 # make a note when the test crashed 169 end_time = time.time() 170 171 # mark test as failed 172 result = (-1, "Fail [Crash]", test["Name"], 173 end_time - start_time, logfile.getvalue(), None) 174 175 # return test results 176 return result 177 178 179# class representing an instance of autotests run 180class AutotestRunner: 181 cmdline = "" 182 parallel_test_groups = [] 183 non_parallel_test_groups = [] 184 logfile = None 185 csvwriter = None 186 target = "" 187 start = None 188 n_tests = 0 189 fails = 0 190 log_buffers = [] 191 blocklist = [] 192 allowlist = [] 193 194 def __init__(self, cmdline, target, blocklist, allowlist, n_processes): 195 self.cmdline = cmdline 196 self.target = target 197 self.blocklist = blocklist 198 self.allowlist = allowlist 199 self.skipped = [] 200 self.parallel_tests = [] 201 self.non_parallel_tests = [] 202 self.n_processes = n_processes 203 self.active_processes = 0 204 205 # parse the binary for available test commands 206 binary = cmdline.split()[0] 207 stripped = 'not stripped' not in \ 208 subprocess.check_output(['file', binary]).decode() 209 if not stripped: 210 symbols = subprocess.check_output(['nm', binary]).decode() 211 self.avail_cmds = re.findall('test_register_(\w+)', symbols) 212 else: 213 self.avail_cmds = None 214 215 # log file filename 216 logfile = "%s.log" % target 217 csvfile = "%s.csv" % target 218 219 self.logfile = open(logfile, "w") 220 csvfile = open(csvfile, "w") 221 self.csvwriter = csv.writer(csvfile) 222 223 # prepare results table 224 self.csvwriter.writerow(["test_name", "test_result", "result_str"]) 225 226 # set up cmdline string 227 def __get_cmdline(self, cpu_nr): 228 cmdline = ("taskset -c %i " % cpu_nr) + self.cmdline 229 230 return cmdline 231 232 def __process_result(self, result): 233 234 # unpack result tuple 235 test_result, result_str, test_name, \ 236 test_time, log, report = result 237 238 # get total run time 239 cur_time = time.time() 240 total_time = int(cur_time - self.start) 241 242 # print results, test run time and total time since start 243 result = ("%s:" % test_name).ljust(30) 244 result += result_str.ljust(29) 245 result += "[%02dm %02ds]" % (test_time / 60, test_time % 60) 246 247 # don't print out total time every line, it's the same anyway 248 print(result + "[%02dm %02ds]" % (total_time / 60, total_time % 60)) 249 250 # if test failed and it wasn't a "start" test 251 if test_result < 0: 252 self.fails += 1 253 254 # collect logs 255 self.log_buffers.append(log) 256 257 # create report if it exists 258 if report: 259 try: 260 f = open("%s_%s_report.rst" % 261 (self.target, test_name), "w") 262 except IOError: 263 print("Report for %s could not be created!" % test_name) 264 else: 265 with f: 266 f.write(report) 267 268 # write test result to CSV file 269 self.csvwriter.writerow([test_name, test_result, result_str]) 270 271 # this function checks individual test and decides if this test should be in 272 # the group by comparing it against allowlist/blocklist. it also checks if 273 # the test is compiled into the binary, and marks it as skipped if necessary 274 def __filter_test(self, test): 275 test_cmd = test["Command"] 276 test_id = test_cmd 277 278 # dump tests are specified in full e.g. "Dump_mempool" 279 if "_autotest" in test_id: 280 test_id = test_id[:-len("_autotest")] 281 282 # filter out blocked/allowed tests 283 if self.blocklist and test_id in self.blocklist: 284 return False 285 if self.allowlist and test_id not in self.allowlist: 286 return False 287 288 # if test wasn't compiled in, remove it as well 289 if self.avail_cmds and test_cmd not in self.avail_cmds: 290 result = 0, "Skipped [Not compiled]", test_id, 0, "", None 291 self.skipped.append(tuple(result)) 292 return False 293 294 return True 295 296 def __run_test_group(self, test_group, worker_cmdlines): 297 group_queue = Queue() 298 init_result_queue = Queue() 299 for proc, cmdline in enumerate(worker_cmdlines): 300 prefix = "test%i" % proc if len(worker_cmdlines) > 1 else "" 301 group_queue.put(tuple((cmdline, prefix))) 302 303 # create a pool of worker threads 304 # we will initialize child in the initializer, and we don't need to 305 # close the child because when the pool worker gets destroyed, child 306 # closes the process 307 pool = Pool(processes=len(worker_cmdlines), 308 initializer=pool_init, 309 initargs=(group_queue, init_result_queue)) 310 311 results = [] 312 313 # process all initialization results 314 for _ in range(len(worker_cmdlines)): 315 self.__process_result(init_result_queue.get()) 316 317 # run all tests asynchronously 318 for test in test_group: 319 result = pool.apply_async(run_test, (self.target, test)) 320 results.append(result) 321 322 # tell the pool to stop all processes once done 323 pool.close() 324 325 # iterate while we have group execution results to get 326 while len(results) > 0: 327 # iterate over a copy to be able to safely delete results 328 # this iterates over a list of group results 329 for async_result in results[:]: 330 # if the thread hasn't finished yet, continue 331 if not async_result.ready(): 332 continue 333 334 res = async_result.get() 335 336 self.__process_result(res) 337 338 # remove result from results list once we're done with it 339 results.remove(async_result) 340 341 # iterate over test groups and run tests associated with them 342 def run_all_tests(self): 343 # filter groups 344 self.parallel_tests = list( 345 filter(self.__filter_test, 346 self.parallel_tests) 347 ) 348 self.non_parallel_tests = list( 349 filter(self.__filter_test, 350 self.non_parallel_tests) 351 ) 352 353 parallel_cmdlines = [] 354 # FreeBSD doesn't have NUMA support 355 numa_nodes = get_numa_nodes() 356 if len(numa_nodes) > 0: 357 for proc in range(self.n_processes): 358 # spread cpu affinity between NUMA nodes to have less chance of 359 # running out of memory while running multiple test apps in 360 # parallel. to do that, alternate between NUMA nodes in a round 361 # robin fashion, and pick an arbitrary CPU from that node to 362 # taskset our execution to 363 numa_node = numa_nodes[self.active_processes % len(numa_nodes)] 364 cpu_nr = first_cpu_on_node(numa_node) 365 parallel_cmdlines += [self.__get_cmdline(cpu_nr)] 366 # increase number of active processes so that the next cmdline 367 # gets a different NUMA node 368 self.active_processes += 1 369 else: 370 parallel_cmdlines = [self.cmdline] * self.n_processes 371 372 print("Running tests with %d workers" % self.n_processes) 373 374 # create table header 375 print("") 376 print("Test name".ljust(30) + "Test result".ljust(29) + 377 "Test".center(9) + "Total".center(9)) 378 print("=" * 80) 379 380 if len(self.skipped): 381 print("Skipped autotests:") 382 383 # print out any skipped tests 384 for result in self.skipped: 385 # unpack result tuple 386 test_result, result_str, test_name, _, _, _ = result 387 self.csvwriter.writerow([test_name, test_result, result_str]) 388 389 t = ("%s:" % test_name).ljust(30) 390 t += result_str.ljust(29) 391 t += "[00m 00s]" 392 393 print(t) 394 395 # make a note of tests start time 396 self.start = time.time() 397 398 # whatever happens, try to save as much logs as possible 399 try: 400 if len(self.parallel_tests) > 0: 401 print("Parallel autotests:") 402 self.__run_test_group(self.parallel_tests, parallel_cmdlines) 403 404 if len(self.non_parallel_tests) > 0: 405 print("Non-parallel autotests:") 406 self.__run_test_group(self.non_parallel_tests, [self.cmdline]) 407 408 # get total run time 409 cur_time = time.time() 410 total_time = int(cur_time - self.start) 411 412 # print out summary 413 print("=" * 80) 414 print("Total run time: %02dm %02ds" % (total_time / 60, 415 total_time % 60)) 416 if self.fails != 0: 417 print("Number of failed tests: %s" % str(self.fails)) 418 419 # write summary to logfile 420 self.logfile.write("Summary\n") 421 self.logfile.write("Target: ".ljust(15) + "%s\n" % self.target) 422 self.logfile.write("Tests: ".ljust(15) + "%i\n" % self.n_tests) 423 self.logfile.write("Failed tests: ".ljust( 424 15) + "%i\n" % self.fails) 425 except: 426 print("Exception occurred") 427 print(sys.exc_info()) 428 self.fails = 1 429 430 # drop logs from all executions to a logfile 431 for buf in self.log_buffers: 432 self.logfile.write(buf.replace("\r", "")) 433 434 return self.fails 435