1#!/usr/bin/env drgn 2# 3# Copyright (C) 2023 Tejun Heo <tj@kernel.org> 4# Copyright (C) 2023 Meta Platforms, Inc. and affiliates. 5 6desc = """ 7This is a drgn script to monitor workqueues. For more info on drgn, visit 8https://github.com/osandov/drgn. 9 10 total Total number of work items executed by the workqueue. 11 12 infl The number of currently in-flight work items. 13 14 CPUtime Total CPU time consumed by the workqueue in seconds. This is 15 sampled from scheduler ticks and only provides ballpark 16 measurement. "nohz_full=" CPUs are excluded from measurement. 17 18 CPUitsv The number of times a concurrency-managed work item hogged CPU 19 longer than the threshold (workqueue.cpu_intensive_thresh_us) 20 and got excluded from concurrency management to avoid stalling 21 other work items. 22 23 CMwake The number of concurrency-management wake-ups while executing a 24 work item of the workqueue. 25 26 mayday The number of times the rescuer was requested while waiting for 27 new worker creation. 28 29 rescued The number of work items executed by the rescuer. 30""" 31 32import sys 33import signal 34import os 35import re 36import time 37import json 38 39import drgn 40from drgn.helpers.linux.list import list_for_each_entry,list_empty 41from drgn.helpers.linux.cpumask import for_each_possible_cpu 42 43import argparse 44parser = argparse.ArgumentParser(description=desc, 45 formatter_class=argparse.RawTextHelpFormatter) 46parser.add_argument('workqueue', metavar='REGEX', nargs='*', 47 help='Target workqueue name patterns (all if empty)') 48parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1, 49 help='Monitoring interval (0 to print once and exit)') 50parser.add_argument('-j', '--json', action='store_true', 51 help='Output in json') 52args = parser.parse_args() 53 54def err(s): 55 print(s, file=sys.stderr, flush=True) 56 sys.exit(1) 57 58workqueues = prog['workqueues'] 59 60WQ_UNBOUND = prog['WQ_UNBOUND'] 61WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM'] 62 63PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution 64PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution 65PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed 66PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations 67PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups 68PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer 69PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer 70PWQ_NR_STATS = prog['PWQ_NR_STATS'] 71 72class WqStats: 73 def __init__(self, wq): 74 self.name = wq.name.string_().decode() 75 self.unbound = wq.flags & WQ_UNBOUND != 0 76 self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0 77 self.stats = [0] * PWQ_NR_STATS 78 for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'): 79 for i in range(PWQ_NR_STATS): 80 self.stats[i] += int(pwq.stats[i]) 81 82 def dict(self, now): 83 return { 'timestamp' : now, 84 'name' : self.name, 85 'unbound' : self.unbound, 86 'mem_reclaim' : self.mem_reclaim, 87 'started' : self.stats[PWQ_STAT_STARTED], 88 'completed' : self.stats[PWQ_STAT_COMPLETED], 89 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME], 90 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE], 91 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP], 92 'mayday' : self.stats[PWQ_STAT_MAYDAY], 93 'rescued' : self.stats[PWQ_STAT_RESCUED], } 94 95 def table_header_str(): 96 return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\ 97 f'{"CPUitsv":>7} {"CMwake":>7} {"mayday":>7} {"rescued":>7}' 98 99 def table_row_str(self): 100 cpu_intensive = '-' 101 cm_wakeup = '-' 102 mayday = '-' 103 rescued = '-' 104 105 if not self.unbound: 106 cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE]) 107 cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP]) 108 109 if self.mem_reclaim: 110 mayday = str(self.stats[PWQ_STAT_MAYDAY]) 111 rescued = str(self.stats[PWQ_STAT_RESCUED]) 112 113 out = f'{self.name[-24:]:24} ' \ 114 f'{self.stats[PWQ_STAT_STARTED]:8} ' \ 115 f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \ 116 f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \ 117 f'{cpu_intensive:>7} ' \ 118 f'{cm_wakeup:>7} ' \ 119 f'{mayday:>7} ' \ 120 f'{rescued:>7} ' 121 return out.rstrip(':') 122 123exit_req = False 124 125def sigint_handler(signr, frame): 126 global exit_req 127 exit_req = True 128 129def main(): 130 # handle args 131 table_fmt = not args.json 132 interval = args.interval 133 134 re_str = None 135 if args.workqueue: 136 for r in args.workqueue: 137 if re_str is None: 138 re_str = r 139 else: 140 re_str += '|' + r 141 142 filter_re = re.compile(re_str) if re_str else None 143 144 # monitoring loop 145 signal.signal(signal.SIGINT, sigint_handler) 146 147 while not exit_req: 148 now = time.time() 149 150 if table_fmt: 151 print() 152 print(WqStats.table_header_str()) 153 154 for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'): 155 stats = WqStats(wq) 156 if filter_re and not filter_re.search(stats.name): 157 continue 158 if table_fmt: 159 print(stats.table_row_str()) 160 else: 161 print(stats.dict(now)) 162 163 if interval == 0: 164 break 165 time.sleep(interval) 166 167if __name__ == "__main__": 168 main() 169