1#  This file is part of pybootchartgui.
2
3#  pybootchartgui is free software: you can redistribute it and/or modify
4#  it under the terms of the GNU General Public License as published by
5#  the Free Software Foundation, either version 3 of the License, or
6#  (at your option) any later version.
7
8#  pybootchartgui is distributed in the hope that it will be useful,
9#  but WITHOUT ANY WARRANTY; without even the implied warranty of
10#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11#  GNU General Public License for more details.
12
13#  You should have received a copy of the GNU General Public License
14#  along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>.
15
16class ProcessTree:
17    """ProcessTree encapsulates a process tree.  The tree is built from log files
18       retrieved during the boot process.  When building the process tree, it is
19       pruned and merged in order to be able to visualize it in a comprehensible
20       manner.
21
22       The following pruning techniques are used:
23
24        * idle processes that keep running during the last process sample
25          (which is a heuristic for a background processes) are removed,
26        * short-lived processes (i.e. processes that only live for the
27          duration of two samples or less) are removed,
28        * the processes used by the boot logger are removed,
29        * exploders (i.e. processes that are known to spawn huge meaningless
30          process subtrees) have their subtrees merged together,
31        * siblings (i.e. processes with the same command line living
32          concurrently -- thread heuristic) are merged together,
33        * process runs (unary trees with processes sharing the command line)
34          are merged together.
35
36    """
37    LOGGER_PROC = 'bootchart-colle'
38    EXPLODER_PROCESSES = set(['hwup'])
39
40    def __init__(self, writer, kernel, psstats, sample_period,
41                 monitoredApp, prune, idle, taskstats,
42                 accurate_parentage, for_testing = False):
43        self.writer = writer
44        self.process_tree = []
45        self.taskstats = taskstats
46        if psstats is None:
47            process_list = kernel
48        elif kernel is None:
49            process_list = psstats.process_map.values()
50        else:
51            process_list = list(kernel) + list(psstats.process_map.values())
52        self.process_list = sorted(process_list, key = lambda p: p.pid)
53        self.sample_period = sample_period
54
55        self.build()
56        if not accurate_parentage:
57            self.update_ppids_for_daemons(self.process_list)
58
59        self.start_time = self.get_start_time(self.process_tree)
60        self.end_time = self.get_end_time(self.process_tree)
61        self.duration = self.end_time - self.start_time
62        self.idle = idle
63
64        if for_testing:
65            return
66
67        removed = self.merge_logger(self.process_tree, self.LOGGER_PROC, monitoredApp, False)
68        writer.status("merged %i logger processes" % removed)
69
70        if prune:
71            p_processes = self.prune(self.process_tree, None)
72            p_exploders = self.merge_exploders(self.process_tree, self.EXPLODER_PROCESSES)
73            p_threads = self.merge_siblings(self.process_tree)
74            p_runs = self.merge_runs(self.process_tree)
75            writer.status("pruned %i process, %i exploders, %i threads, and %i runs" % (p_processes, p_exploders, p_threads, p_runs))
76
77        self.sort(self.process_tree)
78
79        self.start_time = self.get_start_time(self.process_tree)
80        self.end_time = self.get_end_time(self.process_tree)
81        self.duration = self.end_time - self.start_time
82
83        self.num_proc = self.num_nodes(self.process_tree)
84
85    def build(self):
86        """Build the process tree from the list of top samples."""
87        self.process_tree = []
88        for proc in self.process_list:
89            if not proc.parent:
90                self.process_tree.append(proc)
91            else:
92                proc.parent.child_list.append(proc)
93
94    def sort(self, process_subtree):
95        """Sort process tree."""
96        for p in process_subtree:
97            p.child_list.sort(key = lambda p: p.pid)
98            self.sort(p.child_list)
99
100    def num_nodes(self, process_list):
101        "Counts the number of nodes in the specified process tree."""
102        nodes = 0
103        for proc in process_list:
104            nodes = nodes + self.num_nodes(proc.child_list)
105        return nodes + len(process_list)
106
107    def get_start_time(self, process_subtree):
108        """Returns the start time of the process subtree.  This is the start
109           time of the earliest process.
110
111        """
112        if not process_subtree:
113            return 100000000
114        return min( [min(proc.start_time, self.get_start_time(proc.child_list)) for proc in process_subtree] )
115
116    def get_end_time(self, process_subtree):
117        """Returns the end time of the process subtree.  This is the end time
118           of the last collected sample.
119
120        """
121        if not process_subtree:
122            return -100000000
123        return max( [max(proc.start_time + proc.duration, self.get_end_time(proc.child_list)) for proc in process_subtree] )
124
125    def get_max_pid(self, process_subtree):
126        """Returns the max PID found in the process tree."""
127        if not process_subtree:
128            return -100000000
129        return max( [max(proc.pid, self.get_max_pid(proc.child_list)) for proc in process_subtree] )
130
131    def update_ppids_for_daemons(self, process_list):
132        """Fedora hack: when loading the system services from rc, runuser(1)
133           is used.  This sets the PPID of all daemons to 1, skewing
134           the process tree.  Try to detect this and set the PPID of
135           these processes the PID of rc.
136
137        """
138        rcstartpid = -1
139        rcendpid = -1
140        rcproc = None
141        for p in process_list:
142            if p.cmd == "rc" and p.ppid // 1000 == 1:
143                rcproc = p
144                rcstartpid = p.pid
145                rcendpid = self.get_max_pid(p.child_list)
146        if rcstartpid != -1 and rcendpid != -1:
147            for p in process_list:
148                if p.pid > rcstartpid and p.pid < rcendpid and p.ppid // 1000 == 1:
149                    p.ppid = rcstartpid
150                    p.parent = rcproc
151            for p in process_list:
152                p.child_list = []
153            self.build()
154
155    def prune(self, process_subtree, parent):
156        """Prunes the process tree by removing idle processes and processes
157           that only live for the duration of a single top sample.  Sibling
158           processes with the same command line (i.e. threads) are merged
159           together. This filters out sleepy background processes, short-lived
160           processes and bootcharts' analysis tools.
161        """
162        def is_idle_background_process_without_children(p):
163            process_end = p.start_time + p.duration
164            return not p.active and \
165                   process_end >= self.start_time + self.duration and \
166                   p.start_time > self.start_time and \
167                   p.duration > 0.9 * self.duration and \
168                   self.num_nodes(p.child_list) == 0
169
170        num_removed = 0
171        idx = 0
172        while idx < len(process_subtree):
173            p = process_subtree[idx]
174            if parent != None or len(p.child_list) == 0:
175
176                prune = False
177                if is_idle_background_process_without_children(p):
178                    prune = True
179                elif p.duration <= 2 * self.sample_period:
180                    # short-lived process
181                    prune = True
182
183                if prune:
184                    process_subtree.pop(idx)
185                    for c in p.child_list:
186                        process_subtree.insert(idx, c)
187                    num_removed += 1
188                    continue
189                else:
190                    num_removed += self.prune(p.child_list, p)
191            else:
192                num_removed += self.prune(p.child_list, p)
193            idx += 1
194
195        return num_removed
196
197    def merge_logger(self, process_subtree, logger_proc, monitored_app, app_tree):
198        """Merges the logger's process subtree.  The logger will typically
199           spawn lots of sleep and cat processes, thus polluting the
200           process tree.
201
202        """
203        num_removed = 0
204        for p in process_subtree:
205            is_app_tree = app_tree
206            if logger_proc == p.cmd and not app_tree:
207                is_app_tree = True
208                num_removed += self.merge_logger(p.child_list, logger_proc, monitored_app, is_app_tree)
209                # don't remove the logger itself
210                continue
211
212            if app_tree and monitored_app != None and monitored_app == p.cmd:
213                is_app_tree = False
214
215            if is_app_tree:
216                for child in p.child_list:
217                    self.merge_processes(p, child)
218                    num_removed += 1
219                p.child_list = []
220            else:
221                num_removed += self.merge_logger(p.child_list, logger_proc, monitored_app, is_app_tree)
222        return num_removed
223
224    def merge_exploders(self, process_subtree, processes):
225        """Merges specific process subtrees (used for processes which usually
226           spawn huge meaningless process trees).
227
228        """
229        num_removed = 0
230        for p in process_subtree:
231            if processes in processes and len(p.child_list) > 0:
232                subtreemap = self.getProcessMap(p.child_list)
233                for child in subtreemap.values():
234                    self.merge_processes(p, child)
235                    num_removed += len(subtreemap)
236                    p.child_list = []
237                    p.cmd += " (+)"
238            else:
239                num_removed += self.merge_exploders(p.child_list, processes)
240        return num_removed
241
242    def merge_siblings(self, process_subtree):
243        """Merges thread processes.  Sibling processes with the same command
244           line are merged together.
245
246        """
247        num_removed = 0
248        idx = 0
249        while idx < len(process_subtree)-1:
250            p = process_subtree[idx]
251            nextp = process_subtree[idx+1]
252            if nextp.cmd == p.cmd:
253                process_subtree.pop(idx+1)
254                idx -= 1
255                num_removed += 1
256                p.child_list.extend(nextp.child_list)
257                self.merge_processes(p, nextp)
258            num_removed += self.merge_siblings(p.child_list)
259            idx += 1
260        if len(process_subtree) > 0:
261            p = process_subtree[-1]
262            num_removed += self.merge_siblings(p.child_list)
263        return num_removed
264
265    def merge_runs(self, process_subtree):
266        """Merges process runs.  Single child processes which share the same
267           command line with the parent are merged.
268
269        """
270        num_removed = 0
271        idx = 0
272        while idx < len(process_subtree):
273            p = process_subtree[idx]
274            if len(p.child_list) == 1 and p.child_list[0].cmd == p.cmd:
275                child = p.child_list[0]
276                p.child_list = list(child.child_list)
277                self.merge_processes(p, child)
278                num_removed += 1
279                continue
280            num_removed += self.merge_runs(p.child_list)
281            idx += 1
282        return num_removed
283
284    def merge_processes(self, p1, p2):
285        """Merges two process' samples."""
286        p1.samples.extend(p2.samples)
287        p1.samples.sort( key = lambda p: p.time )
288        p1time = p1.start_time
289        p2time = p2.start_time
290        p1.start_time = min(p1time, p2time)
291        pendtime = max(p1time + p1.duration, p2time + p2.duration)
292        p1.duration = pendtime - p1.start_time
293