1#!/usr/bin/env perl
2# This is a POC (proof of concept or piece of crap, take your pick) for reading the
3# text representation of trace output related to page allocation. It makes an attempt
4# to extract some high-level information on what is going on. The accuracy of the parser
5# may vary considerably
6#
7# Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/tracing/trace_pipe
8# other options
9#   --prepend-parent	Report on the parent proc and PID
10#   --read-procstat	If the trace lacks process info, get it from /proc
11#   --ignore-pid	Aggregate processes of the same name together
12#
13# Copyright (c) IBM Corporation 2009
14# Author: Mel Gorman <mel@csn.ul.ie>
15use strict;
16use Getopt::Long;
17
18# Tracepoint events
19use constant MM_PAGE_ALLOC		=> 1;
20use constant MM_PAGE_FREE		=> 2;
21use constant MM_PAGE_FREE_BATCHED	=> 3;
22use constant MM_PAGE_PCPU_DRAIN		=> 4;
23use constant MM_PAGE_ALLOC_ZONE_LOCKED	=> 5;
24use constant MM_PAGE_ALLOC_EXTFRAG	=> 6;
25use constant EVENT_UNKNOWN		=> 7;
26
27# Constants used to track state
28use constant STATE_PCPU_PAGES_DRAINED	=> 8;
29use constant STATE_PCPU_PAGES_REFILLED	=> 9;
30
31# High-level events extrapolated from tracepoints
32use constant HIGH_PCPU_DRAINS		=> 10;
33use constant HIGH_PCPU_REFILLS		=> 11;
34use constant HIGH_EXT_FRAGMENT		=> 12;
35use constant HIGH_EXT_FRAGMENT_SEVERE	=> 13;
36use constant HIGH_EXT_FRAGMENT_MODERATE	=> 14;
37use constant HIGH_EXT_FRAGMENT_CHANGED	=> 15;
38
39my %perprocesspid;
40my %perprocess;
41my $opt_ignorepid;
42my $opt_read_procstat;
43my $opt_prepend_parent;
44
45# Catch sigint and exit on request
46my $sigint_report = 0;
47my $sigint_exit = 0;
48my $sigint_pending = 0;
49my $sigint_received = 0;
50sub sigint_handler {
51	my $current_time = time;
52	if ($current_time - 2 > $sigint_received) {
53		print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
54		$sigint_report = 1;
55	} else {
56		if (!$sigint_exit) {
57			print "Second SIGINT received quickly, exiting\n";
58		}
59		$sigint_exit++;
60	}
61
62	if ($sigint_exit > 3) {
63		print "Many SIGINTs received, exiting now without report\n";
64		exit;
65	}
66
67	$sigint_received = $current_time;
68	$sigint_pending = 1;
69}
70$SIG{INT} = "sigint_handler";
71
72# Parse command line options
73GetOptions(
74	'ignore-pid'	 =>	\$opt_ignorepid,
75	'read-procstat'	 =>	\$opt_read_procstat,
76	'prepend-parent' =>	\$opt_prepend_parent,
77);
78
79# Defaults for dynamically discovered regex's
80my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
81
82# Dyanically discovered regex
83my $regex_fragdetails;
84
85# Static regex used. Specified like this for readability and for use with /o
86#                      (process_pid)     (cpus      )   ( time  )   (tpoint    ) (details)
87my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
88my $regex_statname = '[-0-9]*\s\((.*)\).*';
89my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
90
91sub generate_traceevent_regex {
92	my $event = shift;
93	my $default = shift;
94	my $regex;
95
96	# Read the event format or use the default
97	if (!open (FORMAT, "/sys/kernel/tracing/events/$event/format")) {
98		$regex = $default;
99	} else {
100		my $line;
101		while (!eof(FORMAT)) {
102			$line = <FORMAT>;
103			if ($line =~ /^print fmt:\s"(.*)",.*/) {
104				$regex = $1;
105				$regex =~ s/%p/\([0-9a-f]*\)/g;
106				$regex =~ s/%d/\([-0-9]*\)/g;
107				$regex =~ s/%lu/\([0-9]*\)/g;
108			}
109		}
110	}
111
112	# Verify fields are in the right order
113	my $tuple;
114	foreach $tuple (split /\s/, $regex) {
115		my ($key, $value) = split(/=/, $tuple);
116		my $expected = shift;
117		if ($key ne $expected) {
118			print("WARNING: Format not as expected '$key' != '$expected'");
119			$regex =~ s/$key=\((.*)\)/$key=$1/;
120		}
121	}
122
123	if (defined shift) {
124		die("Fewer fields than expected in format");
125	}
126
127	return $regex;
128}
129$regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag",
130			$regex_fragdetails_default,
131			"page", "pfn",
132			"alloc_order", "fallback_order", "pageblock_order",
133			"alloc_migratetype", "fallback_migratetype",
134			"fragmenting", "change_ownership");
135
136sub read_statline($) {
137	my $pid = $_[0];
138	my $statline;
139
140	if (open(STAT, "/proc/$pid/stat")) {
141		$statline = <STAT>;
142		close(STAT);
143	}
144
145	if ($statline eq '') {
146		$statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
147	}
148
149	return $statline;
150}
151
152sub guess_process_pid($$) {
153	my $pid = $_[0];
154	my $statline = $_[1];
155
156	if ($pid == 0) {
157		return "swapper-0";
158	}
159
160	if ($statline !~ /$regex_statname/o) {
161		die("Failed to math stat line for process name :: $statline");
162	}
163	return "$1-$pid";
164}
165
166sub parent_info($$) {
167	my $pid = $_[0];
168	my $statline = $_[1];
169	my $ppid;
170
171	if ($pid == 0) {
172		return "NOPARENT-0";
173	}
174
175	if ($statline !~ /$regex_statppid/o) {
176		die("Failed to match stat line process ppid:: $statline");
177	}
178
179	# Read the ppid stat line
180	$ppid = $1;
181	return guess_process_pid($ppid, read_statline($ppid));
182}
183
184sub process_events {
185	my $traceevent;
186	my $process_pid;
187	my $cpus;
188	my $timestamp;
189	my $tracepoint;
190	my $details;
191	my $statline;
192
193	# Read each line of the event log
194EVENT_PROCESS:
195	while ($traceevent = <STDIN>) {
196		if ($traceevent =~ /$regex_traceevent/o) {
197			$process_pid = $1;
198			$tracepoint = $4;
199
200			if ($opt_read_procstat || $opt_prepend_parent) {
201				$process_pid =~ /(.*)-([0-9]*)$/;
202				my $process = $1;
203				my $pid = $2;
204
205				$statline = read_statline($pid);
206
207				if ($opt_read_procstat && $process eq '') {
208					$process_pid = guess_process_pid($pid, $statline);
209				}
210
211				if ($opt_prepend_parent) {
212					$process_pid = parent_info($pid, $statline) . " :: $process_pid";
213				}
214			}
215
216			# Unnecessary in this script. Uncomment if required
217			# $cpus = $2;
218			# $timestamp = $3;
219		} else {
220			next;
221		}
222
223		# Perl Switch() sucks majorly
224		if ($tracepoint eq "mm_page_alloc") {
225			$perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
226		} elsif ($tracepoint eq "mm_page_free") {
227			$perprocesspid{$process_pid}->{MM_PAGE_FREE}++
228		} elsif ($tracepoint eq "mm_page_free_batched") {
229			$perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++;
230		} elsif ($tracepoint eq "mm_page_pcpu_drain") {
231			$perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
232			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
233		} elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
234			$perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++;
235			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++;
236		} elsif ($tracepoint eq "mm_page_alloc_extfrag") {
237
238			# Extract the details of the event now
239			$details = $5;
240
241			my ($page, $pfn);
242			my ($alloc_order, $fallback_order, $pageblock_order);
243			my ($alloc_migratetype, $fallback_migratetype);
244			my ($fragmenting, $change_ownership);
245
246			if ($details !~ /$regex_fragdetails/o) {
247				print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
248				next;
249			}
250
251			$perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++;
252			$page = $1;
253			$pfn = $2;
254			$alloc_order = $3;
255			$fallback_order = $4;
256			$pageblock_order = $5;
257			$alloc_migratetype = $6;
258			$fallback_migratetype = $7;
259			$fragmenting = $8;
260			$change_ownership = $9;
261
262			if ($fragmenting) {
263				$perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++;
264				if ($fallback_order <= 3) {
265					$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++;
266				} else {
267					$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++;
268				}
269			}
270			if ($change_ownership) {
271				$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++;
272			}
273		} else {
274			$perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
275		}
276
277		# Catch a full pcpu drain event
278		if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} &&
279				$tracepoint ne "mm_page_pcpu_drain") {
280
281			$perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++;
282			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
283		}
284
285		# Catch a full pcpu refill event
286		if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} &&
287				$tracepoint ne "mm_page_alloc_zone_locked") {
288			$perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++;
289			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
290		}
291
292		if ($sigint_pending) {
293			last EVENT_PROCESS;
294		}
295	}
296}
297
298sub dump_stats {
299	my $hashref = shift;
300	my %stats = %$hashref;
301
302	# Dump per-process stats
303	my $process_pid;
304	my $max_strlen = 0;
305
306	# Get the maximum process name
307	foreach $process_pid (keys %perprocesspid) {
308		my $len = length($process_pid);
309		if ($len > $max_strlen) {
310			$max_strlen = $len;
311		}
312	}
313	$max_strlen += 2;
314
315	printf("\n");
316	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
317		"Process", "Pages",  "Pages",      "Pages", "Pages", "PCPU",  "PCPU",   "PCPU",    "Fragment",  "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
318	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
319		"details", "allocd", "allocd",     "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing",   "Changed", "Severe", "Moderate", "");
320
321	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
322		"",        "",       "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
323
324	foreach $process_pid (keys %stats) {
325		# Dump final aggregates
326		if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) {
327			$stats{$process_pid}->{HIGH_PCPU_DRAINS}++;
328			$stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
329		}
330		if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) {
331			$stats{$process_pid}->{HIGH_PCPU_REFILLS}++;
332			$stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
333		}
334
335		printf("%-" . $max_strlen . "s %8d %10d   %8d %8d   %8d %8d %8d   %8d %8d %8d %8d %8d %8d\n",
336			$process_pid,
337			$stats{$process_pid}->{MM_PAGE_ALLOC},
338			$stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
339			$stats{$process_pid}->{MM_PAGE_FREE},
340			$stats{$process_pid}->{MM_PAGE_FREE_BATCHED},
341			$stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
342			$stats{$process_pid}->{HIGH_PCPU_DRAINS},
343			$stats{$process_pid}->{HIGH_PCPU_REFILLS},
344			$stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG},
345			$stats{$process_pid}->{HIGH_EXT_FRAG},
346			$stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED},
347			$stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE},
348			$stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE},
349			$stats{$process_pid}->{EVENT_UNKNOWN});
350	}
351}
352
353sub aggregate_perprocesspid() {
354	my $process_pid;
355	my $process;
356	undef %perprocess;
357
358	foreach $process_pid (keys %perprocesspid) {
359		$process = $process_pid;
360		$process =~ s/-([0-9])*$//;
361		if ($process eq '') {
362			$process = "NO_PROCESS_NAME";
363		}
364
365		$perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
366		$perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
367		$perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE};
368		$perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED};
369		$perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
370		$perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
371		$perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
372		$perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG};
373		$perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG};
374		$perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED};
375		$perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE};
376		$perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE};
377		$perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN};
378	}
379}
380
381sub report() {
382	if (!$opt_ignorepid) {
383		dump_stats(\%perprocesspid);
384	} else {
385		aggregate_perprocesspid();
386		dump_stats(\%perprocess);
387	}
388}
389
390# Process events or signals until neither is available
391sub signal_loop() {
392	my $sigint_processed;
393	do {
394		$sigint_processed = 0;
395		process_events();
396
397		# Handle pending signals if any
398		if ($sigint_pending) {
399			my $current_time = time;
400
401			if ($sigint_exit) {
402				print "Received exit signal\n";
403				$sigint_pending = 0;
404			}
405			if ($sigint_report) {
406				if ($current_time >= $sigint_received + 2) {
407					report();
408					$sigint_report = 0;
409					$sigint_pending = 0;
410					$sigint_processed = 1;
411				}
412			}
413		}
414	} while ($sigint_pending || $sigint_processed);
415}
416
417signal_loop();
418report();
419