xref: /linux-6.15/tools/workqueue/wq_monitor.py (revision 616db877)
1#!/usr/bin/env drgn
2#
3# Copyright (C) 2023 Tejun Heo <[email protected]>
4# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
5
6desc = """
7This is a drgn script to monitor workqueues. For more info on drgn, visit
8https://github.com/osandov/drgn.
9
10  total    Total number of work items executed by the workqueue.
11
12  infl     The number of currently in-flight work items.
13
14  CPUitsv  The number of times a concurrency-managed work item hogged CPU
15           longer than the threshold (workqueue.cpu_intensive_thresh_us)
16           and got excluded from concurrency management to avoid stalling
17           other work items.
18
19  CMwake   The number of concurrency-management wake-ups while executing a
20           work item of the workqueue.
21
22  mayday   The number of times the rescuer was requested while waiting for
23           new worker creation.
24
25  rescued  The number of work items executed by the rescuer.
26"""
27
28import sys
29import signal
30import os
31import re
32import time
33import json
34
35import drgn
36from drgn.helpers.linux.list import list_for_each_entry,list_empty
37from drgn.helpers.linux.cpumask import for_each_possible_cpu
38
39import argparse
40parser = argparse.ArgumentParser(description=desc,
41                                 formatter_class=argparse.RawTextHelpFormatter)
42parser.add_argument('workqueue', metavar='REGEX', nargs='*',
43                    help='Target workqueue name patterns (all if empty)')
44parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
45                    help='Monitoring interval (0 to print once and exit)')
46parser.add_argument('-j', '--json', action='store_true',
47                    help='Output in json')
48args = parser.parse_args()
49
50def err(s):
51    print(s, file=sys.stderr, flush=True)
52    sys.exit(1)
53
54workqueues              = prog['workqueues']
55
56WQ_UNBOUND              = prog['WQ_UNBOUND']
57WQ_MEM_RECLAIM          = prog['WQ_MEM_RECLAIM']
58
59PWQ_STAT_STARTED        = prog['PWQ_STAT_STARTED']      # work items started execution
60PWQ_STAT_COMPLETED      = prog['PWQ_STAT_COMPLETED']	# work items completed execution
61PWQ_STAT_CPU_INTENSIVE  = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations
62PWQ_STAT_CM_WAKEUP      = prog['PWQ_STAT_CM_WAKEUP']    # concurrency-management worker wakeups
63PWQ_STAT_MAYDAY         = prog['PWQ_STAT_MAYDAY']	# maydays to rescuer
64PWQ_STAT_RESCUED        = prog['PWQ_STAT_RESCUED']	# linked work items executed by rescuer
65PWQ_NR_STATS            = prog['PWQ_NR_STATS']
66
67class WqStats:
68    def __init__(self, wq):
69        self.name = wq.name.string_().decode()
70        self.unbound = wq.flags & WQ_UNBOUND != 0
71        self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
72        self.stats = [0] * PWQ_NR_STATS
73        for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
74            for i in range(PWQ_NR_STATS):
75                self.stats[i] += int(pwq.stats[i])
76
77    def dict(self, now):
78        return { 'timestamp'            : now,
79                 'name'                 : self.name,
80                 'unbound'              : self.unbound,
81                 'mem_reclaim'          : self.mem_reclaim,
82                 'started'              : self.stats[PWQ_STAT_STARTED],
83                 'completed'            : self.stats[PWQ_STAT_COMPLETED],
84                 'cpu_intensive'        : self.stats[PWQ_STAT_CPU_INTENSIVE],
85                 'cm_wakeup'            : self.stats[PWQ_STAT_CM_WAKEUP],
86                 'mayday'               : self.stats[PWQ_STAT_MAYDAY],
87                 'rescued'              : self.stats[PWQ_STAT_RESCUED], }
88
89    def table_header_str():
90        return f'{"":>24} {"total":>8} {"infl":>5} '\
91            f'{"CPUitsv":>7} {"CMwake":>7} {"mayday":>7} {"rescued":>7}'
92
93    def table_row_str(self):
94        cpu_intensive = '-'
95        cm_wakeup = '-'
96        mayday = '-'
97        rescued = '-'
98
99        if not self.unbound:
100            cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
101            cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP])
102
103        if self.mem_reclaim:
104            mayday = str(self.stats[PWQ_STAT_MAYDAY])
105            rescued = str(self.stats[PWQ_STAT_RESCUED])
106
107        out = f'{self.name[-24:]:24} ' \
108              f'{self.stats[PWQ_STAT_STARTED]:8} ' \
109              f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
110              f'{cpu_intensive:>7} ' \
111              f'{cm_wakeup:>7} ' \
112              f'{mayday:>7} ' \
113              f'{rescued:>7} '
114        return out.rstrip(':')
115
116exit_req = False
117
118def sigint_handler(signr, frame):
119    global exit_req
120    exit_req = True
121
122def main():
123    # handle args
124    table_fmt = not args.json
125    interval = args.interval
126
127    re_str = None
128    if args.workqueue:
129        for r in args.workqueue:
130            if re_str is None:
131                re_str = r
132            else:
133                re_str += '|' + r
134
135    filter_re = re.compile(re_str) if re_str else None
136
137    # monitoring loop
138    signal.signal(signal.SIGINT, sigint_handler)
139
140    while not exit_req:
141        now = time.time()
142
143        if table_fmt:
144            print()
145            print(WqStats.table_header_str())
146
147        for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
148            stats = WqStats(wq)
149            if filter_re and not filter_re.search(stats.name):
150                continue
151            if table_fmt:
152                print(stats.table_row_str())
153            else:
154                print(stats.dict(now))
155
156        if interval == 0:
157            break
158        time.sleep(interval)
159
160if __name__ == "__main__":
161    main()
162