xref: /f-stack/dpdk/app/test/autotest_runner.py (revision 2d9fd380)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-3-Clause
3# Copyright(c) 2010-2014 Intel Corporation
4
5# The main logic behind running autotests in parallel
6
7import io
8import csv
9from multiprocessing import Pool, Queue
10import pexpect
11import re
12import subprocess
13import sys
14import time
15import glob
16import os
17
18# wait for prompt
19def wait_prompt(child):
20    try:
21        child.sendline()
22        result = child.expect(["RTE>>", pexpect.TIMEOUT, pexpect.EOF],
23                              timeout=120)
24    except:
25        return False
26    if result == 0:
27        return True
28    else:
29        return False
30
31
32# get all valid NUMA nodes
33def get_numa_nodes():
34    return [
35        int(
36            re.match(r"node(\d+)", os.path.basename(node))
37            .group(1)
38        )
39        for node in glob.glob("/sys/devices/system/node/node*")
40    ]
41
42
43# find first (or any, really) CPU on a particular node, will be used to spread
44# processes around NUMA nodes to avoid exhausting memory on particular node
45def first_cpu_on_node(node_nr):
46    cpu_path = glob.glob("/sys/devices/system/node/node%d/cpu*" % node_nr)
47    r = re.compile(r"cpu(\d+)")
48    cpu_name = filter(None,
49            map(r.match,
50                map(os.path.basename, cpu_path)
51            )
52    )
53    return int(next(cpu_name).group(1))
54
55pool_child = None  # per-process child
56
57
58# we initialize each worker with a queue because we need per-pool unique
59# command-line arguments, but we cannot do different arguments in an initializer
60# because the API doesn't allow per-worker initializer arguments. so, instead,
61# we will initialize with a shared queue, and dequeue command-line arguments
62# from this queue
63def pool_init(queue, result_queue):
64    global pool_child
65
66    cmdline, prefix = queue.get()
67    start_time = time.time()
68    name = ("Start %s" % prefix) if prefix != "" else "Start"
69
70    # use default prefix if no prefix was specified
71    prefix_cmdline = "--file-prefix=%s" % prefix if prefix != "" else ""
72
73    # append prefix to cmdline
74    cmdline = "%s %s" % (cmdline, prefix_cmdline)
75
76    # prepare logging of init
77    startuplog = io.StringIO()
78
79    # run test app
80    try:
81
82        print("\n%s %s\n" % ("=" * 20, prefix), file=startuplog)
83        print("\ncmdline=%s" % cmdline, file=startuplog)
84
85        pool_child = pexpect.spawn(cmdline, logfile=startuplog, encoding='utf-8')
86        # wait for target to boot
87        if not wait_prompt(pool_child):
88            pool_child.close()
89
90            result = tuple((-1,
91                            "Fail [No prompt]",
92                            name,
93                            time.time() - start_time,
94                            startuplog.getvalue(),
95                            None))
96            pool_child = None
97        else:
98            result = tuple((0,
99                            "Success",
100                            name,
101                            time.time() - start_time,
102                            startuplog.getvalue(),
103                            None))
104    except:
105        result = tuple((-1,
106                        "Fail [Can't run]",
107                        name,
108                        time.time() - start_time,
109                        startuplog.getvalue(),
110                        None))
111        pool_child = None
112
113    result_queue.put(result)
114
115
116# run a test
117# each result tuple in results list consists of:
118#   result value (0 or -1)
119#   result string
120#   test name
121#   total test run time (double)
122#   raw test log
123#   test report (if not available, should be None)
124#
125# this function needs to be outside AutotestRunner class because otherwise Pool
126# won't work (or rather it will require quite a bit of effort to make it work).
127def run_test(target, test):
128    global pool_child
129
130    if pool_child is None:
131        return -1, "Fail [No test process]", test["Name"], 0, "", None
132
133    # create log buffer for each test
134    # in multiprocessing environment, the logging would be
135    # interleaved and will create a mess, hence the buffering
136    logfile = io.StringIO()
137    pool_child.logfile = logfile
138
139    # make a note when the test started
140    start_time = time.time()
141
142    try:
143        # print test name to log buffer
144        print("\n%s %s\n" % ("-" * 20, test["Name"]), file=logfile)
145
146        # run test function associated with the test
147        result = test["Func"](pool_child, test["Command"])
148
149        # make a note when the test was finished
150        end_time = time.time()
151
152        log = logfile.getvalue()
153
154        # append test data to the result tuple
155        result += (test["Name"], end_time - start_time, log)
156
157        # call report function, if any defined, and supply it with
158        # target and complete log for test run
159        if test["Report"]:
160            report = test["Report"](target, log)
161
162            # append report to results tuple
163            result += (report,)
164        else:
165            # report is None
166            result += (None,)
167    except:
168        # make a note when the test crashed
169        end_time = time.time()
170
171        # mark test as failed
172        result = (-1, "Fail [Crash]", test["Name"],
173                  end_time - start_time, logfile.getvalue(), None)
174
175    # return test results
176    return result
177
178
179# class representing an instance of autotests run
180class AutotestRunner:
181    cmdline = ""
182    parallel_test_groups = []
183    non_parallel_test_groups = []
184    logfile = None
185    csvwriter = None
186    target = ""
187    start = None
188    n_tests = 0
189    fails = 0
190    log_buffers = []
191    blocklist = []
192    allowlist = []
193
194    def __init__(self, cmdline, target, blocklist, allowlist, n_processes):
195        self.cmdline = cmdline
196        self.target = target
197        self.blocklist = blocklist
198        self.allowlist = allowlist
199        self.skipped = []
200        self.parallel_tests = []
201        self.non_parallel_tests = []
202        self.n_processes = n_processes
203        self.active_processes = 0
204
205        # parse the binary for available test commands
206        binary = cmdline.split()[0]
207        stripped = 'not stripped' not in \
208                   subprocess.check_output(['file', binary]).decode()
209        if not stripped:
210            symbols = subprocess.check_output(['nm', binary]).decode()
211            self.avail_cmds = re.findall('test_register_(\w+)', symbols)
212        else:
213            self.avail_cmds = None
214
215        # log file filename
216        logfile = "%s.log" % target
217        csvfile = "%s.csv" % target
218
219        self.logfile = open(logfile, "w")
220        csvfile = open(csvfile, "w")
221        self.csvwriter = csv.writer(csvfile)
222
223        # prepare results table
224        self.csvwriter.writerow(["test_name", "test_result", "result_str"])
225
226    # set up cmdline string
227    def __get_cmdline(self, cpu_nr):
228        cmdline = ("taskset -c %i " % cpu_nr) + self.cmdline
229
230        return cmdline
231
232    def __process_result(self, result):
233
234        # unpack result tuple
235        test_result, result_str, test_name, \
236            test_time, log, report = result
237
238        # get total run time
239        cur_time = time.time()
240        total_time = int(cur_time - self.start)
241
242        # print results, test run time and total time since start
243        result = ("%s:" % test_name).ljust(30)
244        result += result_str.ljust(29)
245        result += "[%02dm %02ds]" % (test_time / 60, test_time % 60)
246
247        # don't print out total time every line, it's the same anyway
248        print(result + "[%02dm %02ds]" % (total_time / 60, total_time % 60))
249
250        # if test failed and it wasn't a "start" test
251        if test_result < 0:
252            self.fails += 1
253
254        # collect logs
255        self.log_buffers.append(log)
256
257        # create report if it exists
258        if report:
259            try:
260                f = open("%s_%s_report.rst" %
261                         (self.target, test_name), "w")
262            except IOError:
263                print("Report for %s could not be created!" % test_name)
264            else:
265                with f:
266                    f.write(report)
267
268        # write test result to CSV file
269        self.csvwriter.writerow([test_name, test_result, result_str])
270
271    # this function checks individual test and decides if this test should be in
272    # the group by comparing it against allowlist/blocklist. it also checks if
273    # the test is compiled into the binary, and marks it as skipped if necessary
274    def __filter_test(self, test):
275        test_cmd = test["Command"]
276        test_id = test_cmd
277
278        # dump tests are specified in full e.g. "Dump_mempool"
279        if "_autotest" in test_id:
280            test_id = test_id[:-len("_autotest")]
281
282        # filter out blocked/allowed tests
283        if self.blocklist and test_id in self.blocklist:
284            return False
285        if self.allowlist and test_id not in self.allowlist:
286            return False
287
288        # if test wasn't compiled in, remove it as well
289        if self.avail_cmds and test_cmd not in self.avail_cmds:
290            result = 0, "Skipped [Not compiled]", test_id, 0, "", None
291            self.skipped.append(tuple(result))
292            return False
293
294        return True
295
296    def __run_test_group(self, test_group, worker_cmdlines):
297        group_queue = Queue()
298        init_result_queue = Queue()
299        for proc, cmdline in enumerate(worker_cmdlines):
300            prefix = "test%i" % proc if len(worker_cmdlines) > 1 else ""
301            group_queue.put(tuple((cmdline, prefix)))
302
303        # create a pool of worker threads
304        # we will initialize child in the initializer, and we don't need to
305        # close the child because when the pool worker gets destroyed, child
306        # closes the process
307        pool = Pool(processes=len(worker_cmdlines),
308                    initializer=pool_init,
309                    initargs=(group_queue, init_result_queue))
310
311        results = []
312
313        # process all initialization results
314        for _ in range(len(worker_cmdlines)):
315            self.__process_result(init_result_queue.get())
316
317        # run all tests asynchronously
318        for test in test_group:
319            result = pool.apply_async(run_test, (self.target, test))
320            results.append(result)
321
322        # tell the pool to stop all processes once done
323        pool.close()
324
325        # iterate while we have group execution results to get
326        while len(results) > 0:
327            # iterate over a copy to be able to safely delete results
328            # this iterates over a list of group results
329            for async_result in results[:]:
330                # if the thread hasn't finished yet, continue
331                if not async_result.ready():
332                    continue
333
334                res = async_result.get()
335
336                self.__process_result(res)
337
338                # remove result from results list once we're done with it
339                results.remove(async_result)
340
341    # iterate over test groups and run tests associated with them
342    def run_all_tests(self):
343        # filter groups
344        self.parallel_tests = list(
345            filter(self.__filter_test,
346                   self.parallel_tests)
347        )
348        self.non_parallel_tests = list(
349            filter(self.__filter_test,
350                   self.non_parallel_tests)
351        )
352
353        parallel_cmdlines = []
354        # FreeBSD doesn't have NUMA support
355        numa_nodes = get_numa_nodes()
356        if len(numa_nodes) > 0:
357            for proc in range(self.n_processes):
358                # spread cpu affinity between NUMA nodes to have less chance of
359                # running out of memory while running multiple test apps in
360                # parallel. to do that, alternate between NUMA nodes in a round
361                # robin fashion, and pick an arbitrary CPU from that node to
362                # taskset our execution to
363                numa_node = numa_nodes[self.active_processes % len(numa_nodes)]
364                cpu_nr = first_cpu_on_node(numa_node)
365                parallel_cmdlines += [self.__get_cmdline(cpu_nr)]
366                # increase number of active processes so that the next cmdline
367                # gets a different NUMA node
368                self.active_processes += 1
369        else:
370            parallel_cmdlines = [self.cmdline] * self.n_processes
371
372        print("Running tests with %d workers" % self.n_processes)
373
374        # create table header
375        print("")
376        print("Test name".ljust(30) + "Test result".ljust(29) +
377              "Test".center(9) + "Total".center(9))
378        print("=" * 80)
379
380        if len(self.skipped):
381            print("Skipped autotests:")
382
383            # print out any skipped tests
384            for result in self.skipped:
385                # unpack result tuple
386                test_result, result_str, test_name, _, _, _ = result
387                self.csvwriter.writerow([test_name, test_result, result_str])
388
389                t = ("%s:" % test_name).ljust(30)
390                t += result_str.ljust(29)
391                t += "[00m 00s]"
392
393                print(t)
394
395        # make a note of tests start time
396        self.start = time.time()
397
398        # whatever happens, try to save as much logs as possible
399        try:
400            if len(self.parallel_tests) > 0:
401                print("Parallel autotests:")
402                self.__run_test_group(self.parallel_tests, parallel_cmdlines)
403
404            if len(self.non_parallel_tests) > 0:
405                print("Non-parallel autotests:")
406                self.__run_test_group(self.non_parallel_tests, [self.cmdline])
407
408            # get total run time
409            cur_time = time.time()
410            total_time = int(cur_time - self.start)
411
412            # print out summary
413            print("=" * 80)
414            print("Total run time: %02dm %02ds" % (total_time / 60,
415                                                   total_time % 60))
416            if self.fails != 0:
417                print("Number of failed tests: %s" % str(self.fails))
418
419            # write summary to logfile
420            self.logfile.write("Summary\n")
421            self.logfile.write("Target: ".ljust(15) + "%s\n" % self.target)
422            self.logfile.write("Tests: ".ljust(15) + "%i\n" % self.n_tests)
423            self.logfile.write("Failed tests: ".ljust(
424                15) + "%i\n" % self.fails)
425        except:
426            print("Exception occurred")
427            print(sys.exc_info())
428            self.fails = 1
429
430        # drop logs from all executions to a logfile
431        for buf in self.log_buffers:
432            self.logfile.write(buf.replace("\r", ""))
433
434        return self.fails
435