1#!/usr/bin/env perl 2# This is a POC (proof of concept or piece of crap, take your pick) for reading the 3# text representation of trace output related to page allocation. It makes an attempt 4# to extract some high-level information on what is going on. The accuracy of the parser 5# may vary considerably 6# 7# Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe 8# other options 9# --prepend-parent Report on the parent proc and PID 10# --read-procstat If the trace lacks process info, get it from /proc 11# --ignore-pid Aggregate processes of the same name together 12# 13# Copyright (c) IBM Corporation 2009 14# Author: Mel Gorman <mel@csn.ul.ie> 15use strict; 16use Getopt::Long; 17 18# Tracepoint events 19use constant MM_PAGE_ALLOC => 1; 20use constant MM_PAGE_FREE => 2; 21use constant MM_PAGE_FREE_BATCHED => 3; 22use constant MM_PAGE_PCPU_DRAIN => 4; 23use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5; 24use constant MM_PAGE_ALLOC_EXTFRAG => 6; 25use constant EVENT_UNKNOWN => 7; 26 27# Constants used to track state 28use constant STATE_PCPU_PAGES_DRAINED => 8; 29use constant STATE_PCPU_PAGES_REFILLED => 9; 30 31# High-level events extrapolated from tracepoints 32use constant HIGH_PCPU_DRAINS => 10; 33use constant HIGH_PCPU_REFILLS => 11; 34use constant HIGH_EXT_FRAGMENT => 12; 35use constant HIGH_EXT_FRAGMENT_SEVERE => 13; 36use constant HIGH_EXT_FRAGMENT_MODERATE => 14; 37use constant HIGH_EXT_FRAGMENT_CHANGED => 15; 38 39my %perprocesspid; 40my %perprocess; 41my $opt_ignorepid; 42my $opt_read_procstat; 43my $opt_prepend_parent; 44 45# Catch sigint and exit on request 46my $sigint_report = 0; 47my $sigint_exit = 0; 48my $sigint_pending = 0; 49my $sigint_received = 0; 50sub sigint_handler { 51 my $current_time = time; 52 if ($current_time - 2 > $sigint_received) { 53 print "SIGINT received, report pending. Hit ctrl-c again to exit\n"; 54 $sigint_report = 1; 55 } else { 56 if (!$sigint_exit) { 57 print "Second SIGINT received quickly, exiting\n"; 58 } 59 $sigint_exit++; 60 } 61 62 if ($sigint_exit > 3) { 63 print "Many SIGINTs received, exiting now without report\n"; 64 exit; 65 } 66 67 $sigint_received = $current_time; 68 $sigint_pending = 1; 69} 70$SIG{INT} = "sigint_handler"; 71 72# Parse command line options 73GetOptions( 74 'ignore-pid' => \$opt_ignorepid, 75 'read-procstat' => \$opt_read_procstat, 76 'prepend-parent' => \$opt_prepend_parent, 77); 78 79# Defaults for dynamically discovered regex's 80my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])'; 81 82# Dyanically discovered regex 83my $regex_fragdetails; 84 85# Static regex used. Specified like this for readability and for use with /o 86# (process_pid) (cpus ) ( time ) (tpoint ) (details) 87my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)'; 88my $regex_statname = '[-0-9]*\s\((.*)\).*'; 89my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*'; 90 91sub generate_traceevent_regex { 92 my $event = shift; 93 my $default = shift; 94 my $regex; 95 96 # Read the event format or use the default 97 if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) { 98 $regex = $default; 99 } else { 100 my $line; 101 while (!eof(FORMAT)) { 102 $line = <FORMAT>; 103 if ($line =~ /^print fmt:\s"(.*)",.*/) { 104 $regex = $1; 105 $regex =~ s/%p/\([0-9a-f]*\)/g; 106 $regex =~ s/%d/\([-0-9]*\)/g; 107 $regex =~ s/%lu/\([0-9]*\)/g; 108 } 109 } 110 } 111 112 # Verify fields are in the right order 113 my $tuple; 114 foreach $tuple (split /\s/, $regex) { 115 my ($key, $value) = split(/=/, $tuple); 116 my $expected = shift; 117 if ($key ne $expected) { 118 print("WARNING: Format not as expected '$key' != '$expected'"); 119 $regex =~ s/$key=\((.*)\)/$key=$1/; 120 } 121 } 122 123 if (defined shift) { 124 die("Fewer fields than expected in format"); 125 } 126 127 return $regex; 128} 129$regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag", 130 $regex_fragdetails_default, 131 "page", "pfn", 132 "alloc_order", "fallback_order", "pageblock_order", 133 "alloc_migratetype", "fallback_migratetype", 134 "fragmenting", "change_ownership"); 135 136sub read_statline($) { 137 my $pid = $_[0]; 138 my $statline; 139 140 if (open(STAT, "/proc/$pid/stat")) { 141 $statline = <STAT>; 142 close(STAT); 143 } 144 145 if ($statline eq '') { 146 $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0"; 147 } 148 149 return $statline; 150} 151 152sub guess_process_pid($$) { 153 my $pid = $_[0]; 154 my $statline = $_[1]; 155 156 if ($pid == 0) { 157 return "swapper-0"; 158 } 159 160 if ($statline !~ /$regex_statname/o) { 161 die("Failed to math stat line for process name :: $statline"); 162 } 163 return "$1-$pid"; 164} 165 166sub parent_info($$) { 167 my $pid = $_[0]; 168 my $statline = $_[1]; 169 my $ppid; 170 171 if ($pid == 0) { 172 return "NOPARENT-0"; 173 } 174 175 if ($statline !~ /$regex_statppid/o) { 176 die("Failed to match stat line process ppid:: $statline"); 177 } 178 179 # Read the ppid stat line 180 $ppid = $1; 181 return guess_process_pid($ppid, read_statline($ppid)); 182} 183 184sub process_events { 185 my $traceevent; 186 my $process_pid; 187 my $cpus; 188 my $timestamp; 189 my $tracepoint; 190 my $details; 191 my $statline; 192 193 # Read each line of the event log 194EVENT_PROCESS: 195 while ($traceevent = <STDIN>) { 196 if ($traceevent =~ /$regex_traceevent/o) { 197 $process_pid = $1; 198 $tracepoint = $4; 199 200 if ($opt_read_procstat || $opt_prepend_parent) { 201 $process_pid =~ /(.*)-([0-9]*)$/; 202 my $process = $1; 203 my $pid = $2; 204 205 $statline = read_statline($pid); 206 207 if ($opt_read_procstat && $process eq '') { 208 $process_pid = guess_process_pid($pid, $statline); 209 } 210 211 if ($opt_prepend_parent) { 212 $process_pid = parent_info($pid, $statline) . " :: $process_pid"; 213 } 214 } 215 216 # Unnecessary in this script. Uncomment if required 217 # $cpus = $2; 218 # $timestamp = $3; 219 } else { 220 next; 221 } 222 223 # Perl Switch() sucks majorly 224 if ($tracepoint eq "mm_page_alloc") { 225 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++; 226 } elsif ($tracepoint eq "mm_page_free") { 227 $perprocesspid{$process_pid}->{MM_PAGE_FREE}++ 228 } elsif ($tracepoint eq "mm_page_free_batched") { 229 $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++; 230 } elsif ($tracepoint eq "mm_page_pcpu_drain") { 231 $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++; 232 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++; 233 } elsif ($tracepoint eq "mm_page_alloc_zone_locked") { 234 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++; 235 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++; 236 } elsif ($tracepoint eq "mm_page_alloc_extfrag") { 237 238 # Extract the details of the event now 239 $details = $5; 240 241 my ($page, $pfn); 242 my ($alloc_order, $fallback_order, $pageblock_order); 243 my ($alloc_migratetype, $fallback_migratetype); 244 my ($fragmenting, $change_ownership); 245 246 if ($details !~ /$regex_fragdetails/o) { 247 print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n"; 248 next; 249 } 250 251 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++; 252 $page = $1; 253 $pfn = $2; 254 $alloc_order = $3; 255 $fallback_order = $4; 256 $pageblock_order = $5; 257 $alloc_migratetype = $6; 258 $fallback_migratetype = $7; 259 $fragmenting = $8; 260 $change_ownership = $9; 261 262 if ($fragmenting) { 263 $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++; 264 if ($fallback_order <= 3) { 265 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++; 266 } else { 267 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++; 268 } 269 } 270 if ($change_ownership) { 271 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++; 272 } 273 } else { 274 $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++; 275 } 276 277 # Catch a full pcpu drain event 278 if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} && 279 $tracepoint ne "mm_page_pcpu_drain") { 280 281 $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++; 282 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0; 283 } 284 285 # Catch a full pcpu refill event 286 if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} && 287 $tracepoint ne "mm_page_alloc_zone_locked") { 288 $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++; 289 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0; 290 } 291 292 if ($sigint_pending) { 293 last EVENT_PROCESS; 294 } 295 } 296} 297 298sub dump_stats { 299 my $hashref = shift; 300 my %stats = %$hashref; 301 302 # Dump per-process stats 303 my $process_pid; 304 my $max_strlen = 0; 305 306 # Get the maximum process name 307 foreach $process_pid (keys %perprocesspid) { 308 my $len = length($process_pid); 309 if ($len > $max_strlen) { 310 $max_strlen = $len; 311 } 312 } 313 $max_strlen += 2; 314 315 printf("\n"); 316 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", 317 "Process", "Pages", "Pages", "Pages", "Pages", "PCPU", "PCPU", "PCPU", "Fragment", "Fragment", "MigType", "Fragment", "Fragment", "Unknown"); 318 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", 319 "details", "allocd", "allocd", "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing", "Changed", "Severe", "Moderate", ""); 320 321 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", 322 "", "", "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", ""); 323 324 foreach $process_pid (keys %stats) { 325 # Dump final aggregates 326 if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) { 327 $stats{$process_pid}->{HIGH_PCPU_DRAINS}++; 328 $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0; 329 } 330 if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) { 331 $stats{$process_pid}->{HIGH_PCPU_REFILLS}++; 332 $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0; 333 } 334 335 printf("%-" . $max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n", 336 $process_pid, 337 $stats{$process_pid}->{MM_PAGE_ALLOC}, 338 $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}, 339 $stats{$process_pid}->{MM_PAGE_FREE}, 340 $stats{$process_pid}->{MM_PAGE_FREE_BATCHED}, 341 $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN}, 342 $stats{$process_pid}->{HIGH_PCPU_DRAINS}, 343 $stats{$process_pid}->{HIGH_PCPU_REFILLS}, 344 $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}, 345 $stats{$process_pid}->{HIGH_EXT_FRAG}, 346 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}, 347 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}, 348 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}, 349 $stats{$process_pid}->{EVENT_UNKNOWN}); 350 } 351} 352 353sub aggregate_perprocesspid() { 354 my $process_pid; 355 my $process; 356 undef %perprocess; 357 358 foreach $process_pid (keys %perprocesspid) { 359 $process = $process_pid; 360 $process =~ s/-([0-9])*$//; 361 if ($process eq '') { 362 $process = "NO_PROCESS_NAME"; 363 } 364 365 $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}; 366 $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}; 367 $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE}; 368 $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}; 369 $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}; 370 $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}; 371 $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}; 372 $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}; 373 $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}; 374 $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}; 375 $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}; 376 $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}; 377 $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN}; 378 } 379} 380 381sub report() { 382 if (!$opt_ignorepid) { 383 dump_stats(\%perprocesspid); 384 } else { 385 aggregate_perprocesspid(); 386 dump_stats(\%perprocess); 387 } 388} 389 390# Process events or signals until neither is available 391sub signal_loop() { 392 my $sigint_processed; 393 do { 394 $sigint_processed = 0; 395 process_events(); 396 397 # Handle pending signals if any 398 if ($sigint_pending) { 399 my $current_time = time; 400 401 if ($sigint_exit) { 402 print "Received exit signal\n"; 403 $sigint_pending = 0; 404 } 405 if ($sigint_report) { 406 if ($current_time >= $sigint_received + 2) { 407 report(); 408 $sigint_report = 0; 409 $sigint_pending = 0; 410 $sigint_processed = 1; 411 } 412 } 413 } 414 } while ($sigint_pending || $sigint_processed); 415} 416 417signal_loop(); 418report(); 419