xref: /openbmc/linux/scripts/bpf_doc.py (revision a9d85efb)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# Copyright (C) 2018-2019 Netronome Systems, Inc.
5# Copyright (C) 2021 Isovalent, Inc.
6
7# In case user attempts to run with Python 2.
8from __future__ import print_function
9
10import argparse
11import re
12import sys, os
13
14class NoHelperFound(BaseException):
15    pass
16
17class NoSyscallCommandFound(BaseException):
18    pass
19
20class ParsingError(BaseException):
21    def __init__(self, line='<line not provided>', reader=None):
22        if reader:
23            BaseException.__init__(self,
24                                   'Error at file offset %d, parsing line: %s' %
25                                   (reader.tell(), line))
26        else:
27            BaseException.__init__(self, 'Error parsing line: %s' % line)
28
29
30class APIElement(object):
31    """
32    An object representing the description of an aspect of the eBPF API.
33    @proto: prototype of the API symbol
34    @desc: textual description of the symbol
35    @ret: (optional) description of any associated return value
36    """
37    def __init__(self, proto='', desc='', ret=''):
38        self.proto = proto
39        self.desc = desc
40        self.ret = ret
41
42
43class Helper(APIElement):
44    """
45    An object representing the description of an eBPF helper function.
46    @proto: function prototype of the helper function
47    @desc: textual description of the helper function
48    @ret: description of the return value of the helper function
49    """
50    def proto_break_down(self):
51        """
52        Break down helper function protocol into smaller chunks: return type,
53        name, distincts arguments.
54        """
55        arg_re = re.compile('((\w+ )*?(\w+|...))( (\**)(\w+))?$')
56        res = {}
57        proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
58
59        capture = proto_re.match(self.proto)
60        res['ret_type'] = capture.group(1)
61        res['ret_star'] = capture.group(2)
62        res['name']     = capture.group(3)
63        res['args'] = []
64
65        args    = capture.group(4).split(', ')
66        for a in args:
67            capture = arg_re.match(a)
68            res['args'].append({
69                'type' : capture.group(1),
70                'star' : capture.group(5),
71                'name' : capture.group(6)
72            })
73
74        return res
75
76
77class HeaderParser(object):
78    """
79    An object used to parse a file in order to extract the documentation of a
80    list of eBPF helper functions. All the helpers that can be retrieved are
81    stored as Helper object, in the self.helpers() array.
82    @filename: name of file to parse, usually include/uapi/linux/bpf.h in the
83               kernel tree
84    """
85    def __init__(self, filename):
86        self.reader = open(filename, 'r')
87        self.line = ''
88        self.helpers = []
89        self.commands = []
90
91    def parse_element(self):
92        proto    = self.parse_symbol()
93        desc     = self.parse_desc()
94        ret      = self.parse_ret()
95        return APIElement(proto=proto, desc=desc, ret=ret)
96
97    def parse_helper(self):
98        proto    = self.parse_proto()
99        desc     = self.parse_desc()
100        ret      = self.parse_ret()
101        return Helper(proto=proto, desc=desc, ret=ret)
102
103    def parse_symbol(self):
104        p = re.compile(' \* ?(.+)$')
105        capture = p.match(self.line)
106        if not capture:
107            raise NoSyscallCommandFound
108        end_re = re.compile(' \* ?NOTES$')
109        end = end_re.match(self.line)
110        if end:
111            raise NoSyscallCommandFound
112        self.line = self.reader.readline()
113        return capture.group(1)
114
115    def parse_proto(self):
116        # Argument can be of shape:
117        #   - "void"
118        #   - "type  name"
119        #   - "type *name"
120        #   - Same as above, with "const" and/or "struct" in front of type
121        #   - "..." (undefined number of arguments, for bpf_trace_printk())
122        # There is at least one term ("void"), and at most five arguments.
123        p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
124        capture = p.match(self.line)
125        if not capture:
126            raise NoHelperFound
127        self.line = self.reader.readline()
128        return capture.group(1)
129
130    def parse_desc(self):
131        p = re.compile(' \* ?(?:\t| {5,8})Description$')
132        capture = p.match(self.line)
133        if not capture:
134            # Helper can have empty description and we might be parsing another
135            # attribute: return but do not consume.
136            return ''
137        # Description can be several lines, some of them possibly empty, and it
138        # stops when another subsection title is met.
139        desc = ''
140        while True:
141            self.line = self.reader.readline()
142            if self.line == ' *\n':
143                desc += '\n'
144            else:
145                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
146                capture = p.match(self.line)
147                if capture:
148                    desc += capture.group(1) + '\n'
149                else:
150                    break
151        return desc
152
153    def parse_ret(self):
154        p = re.compile(' \* ?(?:\t| {5,8})Return$')
155        capture = p.match(self.line)
156        if not capture:
157            # Helper can have empty retval and we might be parsing another
158            # attribute: return but do not consume.
159            return ''
160        # Return value description can be several lines, some of them possibly
161        # empty, and it stops when another subsection title is met.
162        ret = ''
163        while True:
164            self.line = self.reader.readline()
165            if self.line == ' *\n':
166                ret += '\n'
167            else:
168                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
169                capture = p.match(self.line)
170                if capture:
171                    ret += capture.group(1) + '\n'
172                else:
173                    break
174        return ret
175
176    def seek_to(self, target, help_message):
177        self.reader.seek(0)
178        offset = self.reader.read().find(target)
179        if offset == -1:
180            raise Exception(help_message)
181        self.reader.seek(offset)
182        self.reader.readline()
183        self.reader.readline()
184        self.line = self.reader.readline()
185
186    def parse_syscall(self):
187        self.seek_to('* DOC: eBPF Syscall Commands',
188                     'Could not find start of eBPF syscall descriptions list')
189        while True:
190            try:
191                command = self.parse_element()
192                self.commands.append(command)
193            except NoSyscallCommandFound:
194                break
195
196    def parse_helpers(self):
197        self.seek_to('* Start of BPF helper function descriptions:',
198                     'Could not find start of eBPF helper descriptions list')
199        while True:
200            try:
201                helper = self.parse_helper()
202                self.helpers.append(helper)
203            except NoHelperFound:
204                break
205
206    def run(self):
207        self.parse_syscall()
208        self.parse_helpers()
209        self.reader.close()
210
211###############################################################################
212
213class Printer(object):
214    """
215    A generic class for printers. Printers should be created with an array of
216    Helper objects, and implement a way to print them in the desired fashion.
217    @parser: A HeaderParser with objects to print to standard output
218    """
219    def __init__(self, parser):
220        self.parser = parser
221        self.elements = []
222
223    def print_header(self):
224        pass
225
226    def print_footer(self):
227        pass
228
229    def print_one(self, helper):
230        pass
231
232    def print_all(self):
233        self.print_header()
234        for elem in self.elements:
235            self.print_one(elem)
236        self.print_footer()
237
238
239class PrinterRST(Printer):
240    """
241    A generic class for printers that print ReStructured Text. Printers should
242    be created with a HeaderParser object, and implement a way to print API
243    elements in the desired fashion.
244    @parser: A HeaderParser with objects to print to standard output
245    """
246    def __init__(self, parser):
247        self.parser = parser
248
249    def print_license(self):
250        license = '''\
251.. Copyright (C) All BPF authors and contributors from 2014 to present.
252.. See git log include/uapi/linux/bpf.h in kernel tree for details.
253..
254.. %%%LICENSE_START(VERBATIM)
255.. Permission is granted to make and distribute verbatim copies of this
256.. manual provided the copyright notice and this permission notice are
257.. preserved on all copies.
258..
259.. Permission is granted to copy and distribute modified versions of this
260.. manual under the conditions for verbatim copying, provided that the
261.. entire resulting derived work is distributed under the terms of a
262.. permission notice identical to this one.
263..
264.. Since the Linux kernel and libraries are constantly changing, this
265.. manual page may be incorrect or out-of-date.  The author(s) assume no
266.. responsibility for errors or omissions, or for damages resulting from
267.. the use of the information contained herein.  The author(s) may not
268.. have taken the same level of care in the production of this manual,
269.. which is licensed free of charge, as they might when working
270.. professionally.
271..
272.. Formatted or processed versions of this manual, if unaccompanied by
273.. the source, must acknowledge the copyright and authors of this work.
274.. %%%LICENSE_END
275..
276.. Please do not edit this file. It was generated from the documentation
277.. located in file include/uapi/linux/bpf.h of the Linux kernel sources
278.. (helpers description), and from scripts/bpf_doc.py in the same
279.. repository (header and footer).
280'''
281        print(license)
282
283    def print_elem(self, elem):
284        if (elem.desc):
285            print('\tDescription')
286            # Do not strip all newline characters: formatted code at the end of
287            # a section must be followed by a blank line.
288            for line in re.sub('\n$', '', elem.desc, count=1).split('\n'):
289                print('{}{}'.format('\t\t' if line else '', line))
290
291        if (elem.ret):
292            print('\tReturn')
293            for line in elem.ret.rstrip().split('\n'):
294                print('{}{}'.format('\t\t' if line else '', line))
295
296        print('')
297
298
299class PrinterHelpersRST(PrinterRST):
300    """
301    A printer for dumping collected information about helpers as a ReStructured
302    Text page compatible with the rst2man program, which can be used to
303    generate a manual page for the helpers.
304    @parser: A HeaderParser with Helper objects to print to standard output
305    """
306    def __init__(self, parser):
307        self.elements = parser.helpers
308
309    def print_header(self):
310        header = '''\
311===========
312BPF-HELPERS
313===========
314-------------------------------------------------------------------------------
315list of eBPF helper functions
316-------------------------------------------------------------------------------
317
318:Manual section: 7
319
320DESCRIPTION
321===========
322
323The extended Berkeley Packet Filter (eBPF) subsystem consists in programs
324written in a pseudo-assembly language, then attached to one of the several
325kernel hooks and run in reaction of specific events. This framework differs
326from the older, "classic" BPF (or "cBPF") in several aspects, one of them being
327the ability to call special functions (or "helpers") from within a program.
328These functions are restricted to a white-list of helpers defined in the
329kernel.
330
331These helpers are used by eBPF programs to interact with the system, or with
332the context in which they work. For instance, they can be used to print
333debugging messages, to get the time since the system was booted, to interact
334with eBPF maps, or to manipulate network packets. Since there are several eBPF
335program types, and that they do not run in the same context, each program type
336can only call a subset of those helpers.
337
338Due to eBPF conventions, a helper can not have more than five arguments.
339
340Internally, eBPF programs call directly into the compiled helper functions
341without requiring any foreign-function interface. As a result, calling helpers
342introduces no overhead, thus offering excellent performance.
343
344This document is an attempt to list and document the helpers available to eBPF
345developers. They are sorted by chronological order (the oldest helpers in the
346kernel at the top).
347
348HELPERS
349=======
350'''
351        PrinterRST.print_license(self)
352        print(header)
353
354    def print_footer(self):
355        footer = '''
356EXAMPLES
357========
358
359Example usage for most of the eBPF helpers listed in this manual page are
360available within the Linux kernel sources, at the following locations:
361
362* *samples/bpf/*
363* *tools/testing/selftests/bpf/*
364
365LICENSE
366=======
367
368eBPF programs can have an associated license, passed along with the bytecode
369instructions to the kernel when the programs are loaded. The format for that
370string is identical to the one in use for kernel modules (Dual licenses, such
371as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
372programs that are compatible with the GNU Privacy License (GPL).
373
374In order to use such helpers, the eBPF program must be loaded with the correct
375license string passed (via **attr**) to the **bpf**\ () system call, and this
376generally translates into the C source code of the program containing a line
377similar to the following:
378
379::
380
381	char ____license[] __attribute__((section("license"), used)) = "GPL";
382
383IMPLEMENTATION
384==============
385
386This manual page is an effort to document the existing eBPF helper functions.
387But as of this writing, the BPF sub-system is under heavy development. New eBPF
388program or map types are added, along with new helper functions. Some helpers
389are occasionally made available for additional program types. So in spite of
390the efforts of the community, this page might not be up-to-date. If you want to
391check by yourself what helper functions exist in your kernel, or what types of
392programs they can support, here are some files among the kernel tree that you
393may be interested in:
394
395* *include/uapi/linux/bpf.h* is the main BPF header. It contains the full list
396  of all helper functions, as well as many other BPF definitions including most
397  of the flags, structs or constants used by the helpers.
398* *net/core/filter.c* contains the definition of most network-related helper
399  functions, and the list of program types from which they can be used.
400* *kernel/trace/bpf_trace.c* is the equivalent for most tracing program-related
401  helpers.
402* *kernel/bpf/verifier.c* contains the functions used to check that valid types
403  of eBPF maps are used with a given helper function.
404* *kernel/bpf/* directory contains other files in which additional helpers are
405  defined (for cgroups, sockmaps, etc.).
406* The bpftool utility can be used to probe the availability of helper functions
407  on the system (as well as supported program and map types, and a number of
408  other parameters). To do so, run **bpftool feature probe** (see
409  **bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to
410  list features available to unprivileged users.
411
412Compatibility between helper functions and program types can generally be found
413in the files where helper functions are defined. Look for the **struct
414bpf_func_proto** objects and for functions returning them: these functions
415contain a list of helpers that a given program type can call. Note that the
416**default:** label of the **switch ... case** used to filter helpers can call
417other functions, themselves allowing access to additional helpers. The
418requirement for GPL license is also in those **struct bpf_func_proto**.
419
420Compatibility between helper functions and map types can be found in the
421**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*.
422
423Helper functions that invalidate the checks on **data** and **data_end**
424pointers for network processing are listed in function
425**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*.
426
427SEE ALSO
428========
429
430**bpf**\ (2),
431**bpftool**\ (8),
432**cgroups**\ (7),
433**ip**\ (8),
434**perf_event_open**\ (2),
435**sendmsg**\ (2),
436**socket**\ (7),
437**tc-bpf**\ (8)'''
438        print(footer)
439
440    def print_proto(self, helper):
441        """
442        Format function protocol with bold and italics markers. This makes RST
443        file less readable, but gives nice results in the manual page.
444        """
445        proto = helper.proto_break_down()
446
447        print('**%s %s%s(' % (proto['ret_type'],
448                              proto['ret_star'].replace('*', '\\*'),
449                              proto['name']),
450              end='')
451
452        comma = ''
453        for a in proto['args']:
454            one_arg = '{}{}'.format(comma, a['type'])
455            if a['name']:
456                if a['star']:
457                    one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*'))
458                else:
459                    one_arg += '** '
460                one_arg += '*{}*\\ **'.format(a['name'])
461            comma = ', '
462            print(one_arg, end='')
463
464        print(')**')
465
466    def print_one(self, helper):
467        self.print_proto(helper)
468        self.print_elem(helper)
469
470
471class PrinterSyscallRST(PrinterRST):
472    """
473    A printer for dumping collected information about the syscall API as a
474    ReStructured Text page compatible with the rst2man program, which can be
475    used to generate a manual page for the syscall.
476    @parser: A HeaderParser with APIElement objects to print to standard
477             output
478    """
479    def __init__(self, parser):
480        self.elements = parser.commands
481
482    def print_header(self):
483        header = '''\
484===
485bpf
486===
487-------------------------------------------------------------------------------
488Perform a command on an extended BPF object
489-------------------------------------------------------------------------------
490
491:Manual section: 2
492
493COMMANDS
494========
495'''
496        PrinterRST.print_license(self)
497        print(header)
498
499    def print_one(self, command):
500        print('**%s**' % (command.proto))
501        self.print_elem(command)
502
503
504class PrinterHelpers(Printer):
505    """
506    A printer for dumping collected information about helpers as C header to
507    be included from BPF program.
508    @parser: A HeaderParser with Helper objects to print to standard output
509    """
510    def __init__(self, parser):
511        self.elements = parser.helpers
512
513    type_fwds = [
514            'struct bpf_fib_lookup',
515            'struct bpf_sk_lookup',
516            'struct bpf_perf_event_data',
517            'struct bpf_perf_event_value',
518            'struct bpf_pidns_info',
519            'struct bpf_redir_neigh',
520            'struct bpf_sock',
521            'struct bpf_sock_addr',
522            'struct bpf_sock_ops',
523            'struct bpf_sock_tuple',
524            'struct bpf_spin_lock',
525            'struct bpf_sysctl',
526            'struct bpf_tcp_sock',
527            'struct bpf_tunnel_key',
528            'struct bpf_xfrm_state',
529            'struct linux_binprm',
530            'struct pt_regs',
531            'struct sk_reuseport_md',
532            'struct sockaddr',
533            'struct tcphdr',
534            'struct seq_file',
535            'struct tcp6_sock',
536            'struct tcp_sock',
537            'struct tcp_timewait_sock',
538            'struct tcp_request_sock',
539            'struct udp6_sock',
540            'struct task_struct',
541
542            'struct __sk_buff',
543            'struct sk_msg_md',
544            'struct xdp_md',
545            'struct path',
546            'struct btf_ptr',
547            'struct inode',
548            'struct socket',
549            'struct file',
550            'struct bpf_timer',
551    ]
552    known_types = {
553            '...',
554            'void',
555            'const void',
556            'char',
557            'const char',
558            'int',
559            'long',
560            'unsigned long',
561
562            '__be16',
563            '__be32',
564            '__wsum',
565
566            'struct bpf_fib_lookup',
567            'struct bpf_perf_event_data',
568            'struct bpf_perf_event_value',
569            'struct bpf_pidns_info',
570            'struct bpf_redir_neigh',
571            'struct bpf_sk_lookup',
572            'struct bpf_sock',
573            'struct bpf_sock_addr',
574            'struct bpf_sock_ops',
575            'struct bpf_sock_tuple',
576            'struct bpf_spin_lock',
577            'struct bpf_sysctl',
578            'struct bpf_tcp_sock',
579            'struct bpf_tunnel_key',
580            'struct bpf_xfrm_state',
581            'struct linux_binprm',
582            'struct pt_regs',
583            'struct sk_reuseport_md',
584            'struct sockaddr',
585            'struct tcphdr',
586            'struct seq_file',
587            'struct tcp6_sock',
588            'struct tcp_sock',
589            'struct tcp_timewait_sock',
590            'struct tcp_request_sock',
591            'struct udp6_sock',
592            'struct task_struct',
593            'struct path',
594            'struct btf_ptr',
595            'struct inode',
596            'struct socket',
597            'struct file',
598            'struct bpf_timer',
599    }
600    mapped_types = {
601            'u8': '__u8',
602            'u16': '__u16',
603            'u32': '__u32',
604            'u64': '__u64',
605            's8': '__s8',
606            's16': '__s16',
607            's32': '__s32',
608            's64': '__s64',
609            'size_t': 'unsigned long',
610            'struct bpf_map': 'void',
611            'struct sk_buff': 'struct __sk_buff',
612            'const struct sk_buff': 'const struct __sk_buff',
613            'struct sk_msg_buff': 'struct sk_msg_md',
614            'struct xdp_buff': 'struct xdp_md',
615    }
616    # Helpers overloaded for different context types.
617    overloaded_helpers = [
618        'bpf_get_socket_cookie',
619        'bpf_sk_assign',
620    ]
621
622    def print_header(self):
623        header = '''\
624/* This is auto-generated file. See bpf_doc.py for details. */
625
626/* Forward declarations of BPF structs */'''
627
628        print(header)
629        for fwd in self.type_fwds:
630            print('%s;' % fwd)
631        print('')
632
633    def print_footer(self):
634        footer = ''
635        print(footer)
636
637    def map_type(self, t):
638        if t in self.known_types:
639            return t
640        if t in self.mapped_types:
641            return self.mapped_types[t]
642        print("Unrecognized type '%s', please add it to known types!" % t,
643              file=sys.stderr)
644        sys.exit(1)
645
646    seen_helpers = set()
647
648    def print_one(self, helper):
649        proto = helper.proto_break_down()
650
651        if proto['name'] in self.seen_helpers:
652            return
653        self.seen_helpers.add(proto['name'])
654
655        print('/*')
656        print(" * %s" % proto['name'])
657        print(" *")
658        if (helper.desc):
659            # Do not strip all newline characters: formatted code at the end of
660            # a section must be followed by a blank line.
661            for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
662                print(' *{}{}'.format(' \t' if line else '', line))
663
664        if (helper.ret):
665            print(' *')
666            print(' * Returns')
667            for line in helper.ret.rstrip().split('\n'):
668                print(' *{}{}'.format(' \t' if line else '', line))
669
670        print(' */')
671        print('static %s %s(*%s)(' % (self.map_type(proto['ret_type']),
672                                      proto['ret_star'], proto['name']), end='')
673        comma = ''
674        for i, a in enumerate(proto['args']):
675            t = a['type']
676            n = a['name']
677            if proto['name'] in self.overloaded_helpers and i == 0:
678                    t = 'void'
679                    n = 'ctx'
680            one_arg = '{}{}'.format(comma, self.map_type(t))
681            if n:
682                if a['star']:
683                    one_arg += ' {}'.format(a['star'])
684                else:
685                    one_arg += ' '
686                one_arg += '{}'.format(n)
687            comma = ', '
688            print(one_arg, end='')
689
690        print(') = (void *) %d;' % len(self.seen_helpers))
691        print('')
692
693###############################################################################
694
695# If script is launched from scripts/ from kernel tree and can access
696# ../include/uapi/linux/bpf.h, use it as a default name for the file to parse,
697# otherwise the --filename argument will be required from the command line.
698script = os.path.abspath(sys.argv[0])
699linuxRoot = os.path.dirname(os.path.dirname(script))
700bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
701
702printers = {
703        'helpers': PrinterHelpersRST,
704        'syscall': PrinterSyscallRST,
705}
706
707argParser = argparse.ArgumentParser(description="""
708Parse eBPF header file and generate documentation for the eBPF API.
709The RST-formatted output produced can be turned into a manual page with the
710rst2man utility.
711""")
712argParser.add_argument('--header', action='store_true',
713                       help='generate C header file')
714if (os.path.isfile(bpfh)):
715    argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h',
716                           default=bpfh)
717else:
718    argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
719argParser.add_argument('target', nargs='?', default='helpers',
720                       choices=printers.keys(), help='eBPF API target')
721args = argParser.parse_args()
722
723# Parse file.
724headerParser = HeaderParser(args.filename)
725headerParser.run()
726
727# Print formatted output to standard output.
728if args.header:
729    if args.target != 'helpers':
730        raise NotImplementedError('Only helpers header generation is supported')
731    printer = PrinterHelpers(headerParser)
732else:
733    printer = printers[args.target](headerParser)
734printer.print_all()
735