1074075aeSMasahiro Yamada#!/usr/bin/env python3
26ad7cbc0SNathan Huckleberry# SPDX-License-Identifier: GPL-2.0
36ad7cbc0SNathan Huckleberry#
46ad7cbc0SNathan Huckleberry# Copyright (C) Google LLC, 2018
56ad7cbc0SNathan Huckleberry#
66ad7cbc0SNathan Huckleberry# Author: Tom Roeder <tmroeder@google.com>
76ad7cbc0SNathan Huckleberry#
86ad7cbc0SNathan Huckleberry"""A tool for generating compile_commands.json in the Linux kernel."""
96ad7cbc0SNathan Huckleberry
106ad7cbc0SNathan Huckleberryimport argparse
116ad7cbc0SNathan Huckleberryimport json
126ad7cbc0SNathan Huckleberryimport logging
136ad7cbc0SNathan Huckleberryimport os
146ad7cbc0SNathan Huckleberryimport re
156ad7cbc0SNathan Huckleberryimport subprocess
16ec783c7cSKortanimport sys
176ad7cbc0SNathan Huckleberry
186ad7cbc0SNathan Huckleberry_DEFAULT_OUTPUT = 'compile_commands.json'
196ad7cbc0SNathan Huckleberry_DEFAULT_LOG_LEVEL = 'WARNING'
206ad7cbc0SNathan Huckleberry
216ad7cbc0SNathan Huckleberry_FILENAME_PATTERN = r'^\..*\.cmd$'
221c679214SBenjamin Gray_LINE_PATTERN = r'^savedcmd_[^ ]*\.o := (.* )([^ ]*\.[cS]) *(;|$)'
236ad7cbc0SNathan Huckleberry_VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
24585d32f9SMasahiro Yamada# The tools/ directory adopts a different build system, and produces .cmd
25585d32f9SMasahiro Yamada# files in a different format. Do not support it.
26585d32f9SMasahiro Yamada_EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools']
276ad7cbc0SNathan Huckleberry
286ad7cbc0SNathan Huckleberrydef parse_arguments():
296ad7cbc0SNathan Huckleberry    """Sets up and parses command-line arguments.
306ad7cbc0SNathan Huckleberry
316ad7cbc0SNathan Huckleberry    Returns:
326ad7cbc0SNathan Huckleberry        log_level: A logging level to filter log output.
336ad7cbc0SNathan Huckleberry        directory: The work directory where the objects were built.
346ad7cbc0SNathan Huckleberry        ar: Command used for parsing .a archives.
356ad7cbc0SNathan Huckleberry        output: Where to write the compile-commands JSON file.
366ad7cbc0SNathan Huckleberry        paths: The list of files/directories to handle to find .cmd files.
376ad7cbc0SNathan Huckleberry    """
386ad7cbc0SNathan Huckleberry    usage = 'Creates a compile_commands.json database from kernel .cmd files'
396ad7cbc0SNathan Huckleberry    parser = argparse.ArgumentParser(description=usage)
406ad7cbc0SNathan Huckleberry
416ad7cbc0SNathan Huckleberry    directory_help = ('specify the output directory used for the kernel build '
426ad7cbc0SNathan Huckleberry                      '(defaults to the working directory)')
436ad7cbc0SNathan Huckleberry    parser.add_argument('-d', '--directory', type=str, default='.',
446ad7cbc0SNathan Huckleberry                        help=directory_help)
456ad7cbc0SNathan Huckleberry
466ad7cbc0SNathan Huckleberry    output_help = ('path to the output command database (defaults to ' +
476ad7cbc0SNathan Huckleberry                   _DEFAULT_OUTPUT + ')')
486ad7cbc0SNathan Huckleberry    parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT,
496ad7cbc0SNathan Huckleberry                        help=output_help)
506ad7cbc0SNathan Huckleberry
516ad7cbc0SNathan Huckleberry    log_level_help = ('the level of log messages to produce (defaults to ' +
526ad7cbc0SNathan Huckleberry                      _DEFAULT_LOG_LEVEL + ')')
536ad7cbc0SNathan Huckleberry    parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS,
546ad7cbc0SNathan Huckleberry                        default=_DEFAULT_LOG_LEVEL, help=log_level_help)
556ad7cbc0SNathan Huckleberry
566ad7cbc0SNathan Huckleberry    ar_help = 'command used for parsing .a archives'
576ad7cbc0SNathan Huckleberry    parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help)
586ad7cbc0SNathan Huckleberry
596ad7cbc0SNathan Huckleberry    paths_help = ('directories to search or files to parse '
606ad7cbc0SNathan Huckleberry                  '(files should be *.o, *.a, or modules.order). '
616ad7cbc0SNathan Huckleberry                  'If nothing is specified, the current directory is searched')
626ad7cbc0SNathan Huckleberry    parser.add_argument('paths', type=str, nargs='*', help=paths_help)
636ad7cbc0SNathan Huckleberry
646ad7cbc0SNathan Huckleberry    args = parser.parse_args()
656ad7cbc0SNathan Huckleberry
666ad7cbc0SNathan Huckleberry    return (args.log_level,
676ad7cbc0SNathan Huckleberry            os.path.abspath(args.directory),
686ad7cbc0SNathan Huckleberry            args.output,
696ad7cbc0SNathan Huckleberry            args.ar,
706ad7cbc0SNathan Huckleberry            args.paths if len(args.paths) > 0 else [args.directory])
716ad7cbc0SNathan Huckleberry
726ad7cbc0SNathan Huckleberry
736ad7cbc0SNathan Huckleberrydef cmdfiles_in_dir(directory):
746ad7cbc0SNathan Huckleberry    """Generate the iterator of .cmd files found under the directory.
756ad7cbc0SNathan Huckleberry
766ad7cbc0SNathan Huckleberry    Walk under the given directory, and yield every .cmd file found.
776ad7cbc0SNathan Huckleberry
786ad7cbc0SNathan Huckleberry    Args:
796ad7cbc0SNathan Huckleberry        directory: The directory to search for .cmd files.
806ad7cbc0SNathan Huckleberry
816ad7cbc0SNathan Huckleberry    Yields:
826ad7cbc0SNathan Huckleberry        The path to a .cmd file.
836ad7cbc0SNathan Huckleberry    """
846ad7cbc0SNathan Huckleberry
856ad7cbc0SNathan Huckleberry    filename_matcher = re.compile(_FILENAME_PATTERN)
86585d32f9SMasahiro Yamada    exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ]
876ad7cbc0SNathan Huckleberry
88585d32f9SMasahiro Yamada    for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
89585d32f9SMasahiro Yamada        # Prune unwanted directories.
90585d32f9SMasahiro Yamada        if dirpath in exclude_dirs:
91585d32f9SMasahiro Yamada            dirnames[:] = []
92585d32f9SMasahiro Yamada            continue
93585d32f9SMasahiro Yamada
946ad7cbc0SNathan Huckleberry        for filename in filenames:
956ad7cbc0SNathan Huckleberry            if filename_matcher.match(filename):
966ad7cbc0SNathan Huckleberry                yield os.path.join(dirpath, filename)
976ad7cbc0SNathan Huckleberry
986ad7cbc0SNathan Huckleberry
996ad7cbc0SNathan Huckleberrydef to_cmdfile(path):
1006ad7cbc0SNathan Huckleberry    """Return the path of .cmd file used for the given build artifact
1016ad7cbc0SNathan Huckleberry
1026ad7cbc0SNathan Huckleberry    Args:
1036ad7cbc0SNathan Huckleberry        Path: file path
1046ad7cbc0SNathan Huckleberry
1056ad7cbc0SNathan Huckleberry    Returns:
1066ad7cbc0SNathan Huckleberry        The path to .cmd file
1076ad7cbc0SNathan Huckleberry    """
1086ad7cbc0SNathan Huckleberry    dir, base = os.path.split(path)
1096ad7cbc0SNathan Huckleberry    return os.path.join(dir, '.' + base + '.cmd')
1106ad7cbc0SNathan Huckleberry
1116ad7cbc0SNathan Huckleberry
1126ad7cbc0SNathan Huckleberrydef cmdfiles_for_a(archive, ar):
1136ad7cbc0SNathan Huckleberry    """Generate the iterator of .cmd files associated with the archive.
1146ad7cbc0SNathan Huckleberry
1156ad7cbc0SNathan Huckleberry    Parse the given archive, and yield every .cmd file used to build it.
1166ad7cbc0SNathan Huckleberry
1176ad7cbc0SNathan Huckleberry    Args:
1186ad7cbc0SNathan Huckleberry        archive: The archive to parse
1196ad7cbc0SNathan Huckleberry
1206ad7cbc0SNathan Huckleberry    Yields:
1216ad7cbc0SNathan Huckleberry        The path to every .cmd file found
1226ad7cbc0SNathan Huckleberry    """
1236ad7cbc0SNathan Huckleberry    for obj in subprocess.check_output([ar, '-t', archive]).decode().split():
1246ad7cbc0SNathan Huckleberry        yield to_cmdfile(obj)
1256ad7cbc0SNathan Huckleberry
1266ad7cbc0SNathan Huckleberry
1276ad7cbc0SNathan Huckleberrydef cmdfiles_for_modorder(modorder):
1286ad7cbc0SNathan Huckleberry    """Generate the iterator of .cmd files associated with the modules.order.
1296ad7cbc0SNathan Huckleberry
1306ad7cbc0SNathan Huckleberry    Parse the given modules.order, and yield every .cmd file used to build the
1316ad7cbc0SNathan Huckleberry    contained modules.
1326ad7cbc0SNathan Huckleberry
1336ad7cbc0SNathan Huckleberry    Args:
1346ad7cbc0SNathan Huckleberry        modorder: The modules.order file to parse
1356ad7cbc0SNathan Huckleberry
1366ad7cbc0SNathan Huckleberry    Yields:
1376ad7cbc0SNathan Huckleberry        The path to every .cmd file found
1386ad7cbc0SNathan Huckleberry    """
1396ad7cbc0SNathan Huckleberry    with open(modorder) as f:
1406ad7cbc0SNathan Huckleberry        for line in f:
141f65a4868SMasahiro Yamada            obj = line.rstrip()
142f65a4868SMasahiro Yamada            base, ext = os.path.splitext(obj)
143f65a4868SMasahiro Yamada            if ext != '.o':
144f65a4868SMasahiro Yamada                sys.exit('{}: module path must end with .o'.format(obj))
1456ad7cbc0SNathan Huckleberry            mod = base + '.mod'
146a4ab14e1SJohn Hubbard            # Read from *.mod, to get a list of objects that compose the module.
1476ad7cbc0SNathan Huckleberry            with open(mod) as m:
148a4ab14e1SJohn Hubbard                for mod_line in m:
149a4ab14e1SJohn Hubbard                    yield to_cmdfile(mod_line.rstrip())
1506ad7cbc0SNathan Huckleberry
1516ad7cbc0SNathan Huckleberry
1526ad7cbc0SNathan Huckleberrydef process_line(root_directory, command_prefix, file_path):
1536ad7cbc0SNathan Huckleberry    """Extracts information from a .cmd line and creates an entry from it.
1546ad7cbc0SNathan Huckleberry
1556ad7cbc0SNathan Huckleberry    Args:
1566ad7cbc0SNathan Huckleberry        root_directory: The directory that was searched for .cmd files. Usually
1576ad7cbc0SNathan Huckleberry            used directly in the "directory" entry in compile_commands.json.
1586ad7cbc0SNathan Huckleberry        command_prefix: The extracted command line, up to the last element.
1596ad7cbc0SNathan Huckleberry        file_path: The .c file from the end of the extracted command.
1606ad7cbc0SNathan Huckleberry            Usually relative to root_directory, but sometimes absolute.
1616ad7cbc0SNathan Huckleberry
1626ad7cbc0SNathan Huckleberry    Returns:
1636ad7cbc0SNathan Huckleberry        An entry to append to compile_commands.
1646ad7cbc0SNathan Huckleberry
1656ad7cbc0SNathan Huckleberry    Raises:
1666ad7cbc0SNathan Huckleberry        ValueError: Could not find the extracted file based on file_path and
1676ad7cbc0SNathan Huckleberry            root_directory or file_directory.
1686ad7cbc0SNathan Huckleberry    """
1696ad7cbc0SNathan Huckleberry    # The .cmd files are intended to be included directly by Make, so they
1706ad7cbc0SNathan Huckleberry    # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the
1716ad7cbc0SNathan Huckleberry    # kernel version). The compile_commands.json file is not interepreted
1726ad7cbc0SNathan Huckleberry    # by Make, so this code replaces the escaped version with '#'.
173*b98f2b86SAndrew Ballance    prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#')
1746ad7cbc0SNathan Huckleberry
1756ad7cbc0SNathan Huckleberry    # Use os.path.abspath() to normalize the path resolving '.' and '..' .
1766ad7cbc0SNathan Huckleberry    abs_path = os.path.abspath(os.path.join(root_directory, file_path))
1776ad7cbc0SNathan Huckleberry    if not os.path.exists(abs_path):
1786ad7cbc0SNathan Huckleberry        raise ValueError('File %s not found' % abs_path)
1796ad7cbc0SNathan Huckleberry    return {
1806ad7cbc0SNathan Huckleberry        'directory': root_directory,
1816ad7cbc0SNathan Huckleberry        'file': abs_path,
1826ad7cbc0SNathan Huckleberry        'command': prefix + file_path,
1836ad7cbc0SNathan Huckleberry    }
1846ad7cbc0SNathan Huckleberry
1856ad7cbc0SNathan Huckleberry
1866ad7cbc0SNathan Huckleberrydef main():
1876ad7cbc0SNathan Huckleberry    """Walks through the directory and finds and parses .cmd files."""
1886ad7cbc0SNathan Huckleberry    log_level, directory, output, ar, paths = parse_arguments()
1896ad7cbc0SNathan Huckleberry
1906ad7cbc0SNathan Huckleberry    level = getattr(logging, log_level)
1916ad7cbc0SNathan Huckleberry    logging.basicConfig(format='%(levelname)s: %(message)s', level=level)
1926ad7cbc0SNathan Huckleberry
1936ad7cbc0SNathan Huckleberry    line_matcher = re.compile(_LINE_PATTERN)
1946ad7cbc0SNathan Huckleberry
1956ad7cbc0SNathan Huckleberry    compile_commands = []
1966ad7cbc0SNathan Huckleberry
1976ad7cbc0SNathan Huckleberry    for path in paths:
1986ad7cbc0SNathan Huckleberry        # If 'path' is a directory, handle all .cmd files under it.
1996ad7cbc0SNathan Huckleberry        # Otherwise, handle .cmd files associated with the file.
20032164845SMasahiro Yamada        # built-in objects are linked via vmlinux.a
2016ad7cbc0SNathan Huckleberry        # Modules are listed in modules.order.
2026ad7cbc0SNathan Huckleberry        if os.path.isdir(path):
2036ad7cbc0SNathan Huckleberry            cmdfiles = cmdfiles_in_dir(path)
2046ad7cbc0SNathan Huckleberry        elif path.endswith('.a'):
2056ad7cbc0SNathan Huckleberry            cmdfiles = cmdfiles_for_a(path, ar)
2066ad7cbc0SNathan Huckleberry        elif path.endswith('modules.order'):
2076ad7cbc0SNathan Huckleberry            cmdfiles = cmdfiles_for_modorder(path)
2086ad7cbc0SNathan Huckleberry        else:
2096ad7cbc0SNathan Huckleberry            sys.exit('{}: unknown file type'.format(path))
2106ad7cbc0SNathan Huckleberry
2116ad7cbc0SNathan Huckleberry        for cmdfile in cmdfiles:
2126ad7cbc0SNathan Huckleberry            with open(cmdfile, 'rt') as f:
2136ad7cbc0SNathan Huckleberry                result = line_matcher.match(f.readline())
2146ad7cbc0SNathan Huckleberry                if result:
2156ad7cbc0SNathan Huckleberry                    try:
2166ad7cbc0SNathan Huckleberry                        entry = process_line(directory, result.group(1),
2176ad7cbc0SNathan Huckleberry                                             result.group(2))
2186ad7cbc0SNathan Huckleberry                        compile_commands.append(entry)
2196ad7cbc0SNathan Huckleberry                    except ValueError as err:
2206ad7cbc0SNathan Huckleberry                        logging.info('Could not add line from %s: %s',
2216ad7cbc0SNathan Huckleberry                                     cmdfile, err)
2226ad7cbc0SNathan Huckleberry
2236ad7cbc0SNathan Huckleberry    with open(output, 'wt') as f:
2246ad7cbc0SNathan Huckleberry        json.dump(compile_commands, f, indent=2, sort_keys=True)
2256ad7cbc0SNathan Huckleberry
2266ad7cbc0SNathan Huckleberry
2276ad7cbc0SNathan Huckleberryif __name__ == '__main__':
2286ad7cbc0SNathan Huckleberry    main()
229