1#!/usr/bin/env drgn 2# 3# Copyright (C) 2023 Tejun Heo <[email protected]> 4# Copyright (C) 2023 Meta Platforms, Inc. and affiliates. 5 6desc = """ 7This is a drgn script to monitor workqueues. For more info on drgn, visit 8https://github.com/osandov/drgn. 9 10 total Total number of work items executed by the workqueue. 11 12 infl The number of currently in-flight work items. 13 14 CPUitsv The number of times a concurrency-managed work item hogged CPU 15 longer than the threshold (workqueue.cpu_intensive_thresh_us) 16 and got excluded from concurrency management to avoid stalling 17 other work items. 18 19 CMwake The number of concurrency-management wake-ups while executing a 20 work item of the workqueue. 21 22 mayday The number of times the rescuer was requested while waiting for 23 new worker creation. 24 25 rescued The number of work items executed by the rescuer. 26""" 27 28import sys 29import signal 30import os 31import re 32import time 33import json 34 35import drgn 36from drgn.helpers.linux.list import list_for_each_entry,list_empty 37from drgn.helpers.linux.cpumask import for_each_possible_cpu 38 39import argparse 40parser = argparse.ArgumentParser(description=desc, 41 formatter_class=argparse.RawTextHelpFormatter) 42parser.add_argument('workqueue', metavar='REGEX', nargs='*', 43 help='Target workqueue name patterns (all if empty)') 44parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1, 45 help='Monitoring interval (0 to print once and exit)') 46parser.add_argument('-j', '--json', action='store_true', 47 help='Output in json') 48args = parser.parse_args() 49 50def err(s): 51 print(s, file=sys.stderr, flush=True) 52 sys.exit(1) 53 54workqueues = prog['workqueues'] 55 56WQ_UNBOUND = prog['WQ_UNBOUND'] 57WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM'] 58 59PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution 60PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution 61PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations 62PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups 63PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer 64PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer 65PWQ_NR_STATS = prog['PWQ_NR_STATS'] 66 67class WqStats: 68 def __init__(self, wq): 69 self.name = wq.name.string_().decode() 70 self.unbound = wq.flags & WQ_UNBOUND != 0 71 self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0 72 self.stats = [0] * PWQ_NR_STATS 73 for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'): 74 for i in range(PWQ_NR_STATS): 75 self.stats[i] += int(pwq.stats[i]) 76 77 def dict(self, now): 78 return { 'timestamp' : now, 79 'name' : self.name, 80 'unbound' : self.unbound, 81 'mem_reclaim' : self.mem_reclaim, 82 'started' : self.stats[PWQ_STAT_STARTED], 83 'completed' : self.stats[PWQ_STAT_COMPLETED], 84 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE], 85 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP], 86 'mayday' : self.stats[PWQ_STAT_MAYDAY], 87 'rescued' : self.stats[PWQ_STAT_RESCUED], } 88 89 def table_header_str(): 90 return f'{"":>24} {"total":>8} {"infl":>5} '\ 91 f'{"CPUitsv":>7} {"CMwake":>7} {"mayday":>7} {"rescued":>7}' 92 93 def table_row_str(self): 94 cpu_intensive = '-' 95 cm_wakeup = '-' 96 mayday = '-' 97 rescued = '-' 98 99 if not self.unbound: 100 cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE]) 101 cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP]) 102 103 if self.mem_reclaim: 104 mayday = str(self.stats[PWQ_STAT_MAYDAY]) 105 rescued = str(self.stats[PWQ_STAT_RESCUED]) 106 107 out = f'{self.name[-24:]:24} ' \ 108 f'{self.stats[PWQ_STAT_STARTED]:8} ' \ 109 f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \ 110 f'{cpu_intensive:>7} ' \ 111 f'{cm_wakeup:>7} ' \ 112 f'{mayday:>7} ' \ 113 f'{rescued:>7} ' 114 return out.rstrip(':') 115 116exit_req = False 117 118def sigint_handler(signr, frame): 119 global exit_req 120 exit_req = True 121 122def main(): 123 # handle args 124 table_fmt = not args.json 125 interval = args.interval 126 127 re_str = None 128 if args.workqueue: 129 for r in args.workqueue: 130 if re_str is None: 131 re_str = r 132 else: 133 re_str += '|' + r 134 135 filter_re = re.compile(re_str) if re_str else None 136 137 # monitoring loop 138 signal.signal(signal.SIGINT, sigint_handler) 139 140 while not exit_req: 141 now = time.time() 142 143 if table_fmt: 144 print() 145 print(WqStats.table_header_str()) 146 147 for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'): 148 stats = WqStats(wq) 149 if filter_re and not filter_re.search(stats.name): 150 continue 151 if table_fmt: 152 print(stats.table_row_str()) 153 else: 154 print(stats.dict(now)) 155 156 if interval == 0: 157 break 158 time.sleep(interval) 159 160if __name__ == "__main__": 161 main() 162