1074075aeSMasahiro Yamada#!/usr/bin/env python3 26ad7cbc0SNathan Huckleberry# SPDX-License-Identifier: GPL-2.0 36ad7cbc0SNathan Huckleberry# 46ad7cbc0SNathan Huckleberry# Copyright (C) Google LLC, 2018 56ad7cbc0SNathan Huckleberry# 66ad7cbc0SNathan Huckleberry# Author: Tom Roeder <tmroeder@google.com> 76ad7cbc0SNathan Huckleberry# 86ad7cbc0SNathan Huckleberry"""A tool for generating compile_commands.json in the Linux kernel.""" 96ad7cbc0SNathan Huckleberry 106ad7cbc0SNathan Huckleberryimport argparse 116ad7cbc0SNathan Huckleberryimport json 126ad7cbc0SNathan Huckleberryimport logging 136ad7cbc0SNathan Huckleberryimport os 146ad7cbc0SNathan Huckleberryimport re 156ad7cbc0SNathan Huckleberryimport subprocess 16ec783c7cSKortanimport sys 176ad7cbc0SNathan Huckleberry 186ad7cbc0SNathan Huckleberry_DEFAULT_OUTPUT = 'compile_commands.json' 196ad7cbc0SNathan Huckleberry_DEFAULT_LOG_LEVEL = 'WARNING' 206ad7cbc0SNathan Huckleberry 216ad7cbc0SNathan Huckleberry_FILENAME_PATTERN = r'^\..*\.cmd$' 221c679214SBenjamin Gray_LINE_PATTERN = r'^savedcmd_[^ ]*\.o := (.* )([^ ]*\.[cS]) *(;|$)' 236ad7cbc0SNathan Huckleberry_VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] 24585d32f9SMasahiro Yamada# The tools/ directory adopts a different build system, and produces .cmd 25585d32f9SMasahiro Yamada# files in a different format. Do not support it. 26585d32f9SMasahiro Yamada_EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools'] 276ad7cbc0SNathan Huckleberry 286ad7cbc0SNathan Huckleberrydef parse_arguments(): 296ad7cbc0SNathan Huckleberry """Sets up and parses command-line arguments. 306ad7cbc0SNathan Huckleberry 316ad7cbc0SNathan Huckleberry Returns: 326ad7cbc0SNathan Huckleberry log_level: A logging level to filter log output. 336ad7cbc0SNathan Huckleberry directory: The work directory where the objects were built. 346ad7cbc0SNathan Huckleberry ar: Command used for parsing .a archives. 356ad7cbc0SNathan Huckleberry output: Where to write the compile-commands JSON file. 366ad7cbc0SNathan Huckleberry paths: The list of files/directories to handle to find .cmd files. 376ad7cbc0SNathan Huckleberry """ 386ad7cbc0SNathan Huckleberry usage = 'Creates a compile_commands.json database from kernel .cmd files' 396ad7cbc0SNathan Huckleberry parser = argparse.ArgumentParser(description=usage) 406ad7cbc0SNathan Huckleberry 416ad7cbc0SNathan Huckleberry directory_help = ('specify the output directory used for the kernel build ' 426ad7cbc0SNathan Huckleberry '(defaults to the working directory)') 436ad7cbc0SNathan Huckleberry parser.add_argument('-d', '--directory', type=str, default='.', 446ad7cbc0SNathan Huckleberry help=directory_help) 456ad7cbc0SNathan Huckleberry 466ad7cbc0SNathan Huckleberry output_help = ('path to the output command database (defaults to ' + 476ad7cbc0SNathan Huckleberry _DEFAULT_OUTPUT + ')') 486ad7cbc0SNathan Huckleberry parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT, 496ad7cbc0SNathan Huckleberry help=output_help) 506ad7cbc0SNathan Huckleberry 516ad7cbc0SNathan Huckleberry log_level_help = ('the level of log messages to produce (defaults to ' + 526ad7cbc0SNathan Huckleberry _DEFAULT_LOG_LEVEL + ')') 536ad7cbc0SNathan Huckleberry parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS, 546ad7cbc0SNathan Huckleberry default=_DEFAULT_LOG_LEVEL, help=log_level_help) 556ad7cbc0SNathan Huckleberry 566ad7cbc0SNathan Huckleberry ar_help = 'command used for parsing .a archives' 576ad7cbc0SNathan Huckleberry parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help) 586ad7cbc0SNathan Huckleberry 596ad7cbc0SNathan Huckleberry paths_help = ('directories to search or files to parse ' 606ad7cbc0SNathan Huckleberry '(files should be *.o, *.a, or modules.order). ' 616ad7cbc0SNathan Huckleberry 'If nothing is specified, the current directory is searched') 626ad7cbc0SNathan Huckleberry parser.add_argument('paths', type=str, nargs='*', help=paths_help) 636ad7cbc0SNathan Huckleberry 646ad7cbc0SNathan Huckleberry args = parser.parse_args() 656ad7cbc0SNathan Huckleberry 666ad7cbc0SNathan Huckleberry return (args.log_level, 676ad7cbc0SNathan Huckleberry os.path.abspath(args.directory), 686ad7cbc0SNathan Huckleberry args.output, 696ad7cbc0SNathan Huckleberry args.ar, 706ad7cbc0SNathan Huckleberry args.paths if len(args.paths) > 0 else [args.directory]) 716ad7cbc0SNathan Huckleberry 726ad7cbc0SNathan Huckleberry 736ad7cbc0SNathan Huckleberrydef cmdfiles_in_dir(directory): 746ad7cbc0SNathan Huckleberry """Generate the iterator of .cmd files found under the directory. 756ad7cbc0SNathan Huckleberry 766ad7cbc0SNathan Huckleberry Walk under the given directory, and yield every .cmd file found. 776ad7cbc0SNathan Huckleberry 786ad7cbc0SNathan Huckleberry Args: 796ad7cbc0SNathan Huckleberry directory: The directory to search for .cmd files. 806ad7cbc0SNathan Huckleberry 816ad7cbc0SNathan Huckleberry Yields: 826ad7cbc0SNathan Huckleberry The path to a .cmd file. 836ad7cbc0SNathan Huckleberry """ 846ad7cbc0SNathan Huckleberry 856ad7cbc0SNathan Huckleberry filename_matcher = re.compile(_FILENAME_PATTERN) 86585d32f9SMasahiro Yamada exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ] 876ad7cbc0SNathan Huckleberry 88585d32f9SMasahiro Yamada for dirpath, dirnames, filenames in os.walk(directory, topdown=True): 89585d32f9SMasahiro Yamada # Prune unwanted directories. 90585d32f9SMasahiro Yamada if dirpath in exclude_dirs: 91585d32f9SMasahiro Yamada dirnames[:] = [] 92585d32f9SMasahiro Yamada continue 93585d32f9SMasahiro Yamada 946ad7cbc0SNathan Huckleberry for filename in filenames: 956ad7cbc0SNathan Huckleberry if filename_matcher.match(filename): 966ad7cbc0SNathan Huckleberry yield os.path.join(dirpath, filename) 976ad7cbc0SNathan Huckleberry 986ad7cbc0SNathan Huckleberry 996ad7cbc0SNathan Huckleberrydef to_cmdfile(path): 1006ad7cbc0SNathan Huckleberry """Return the path of .cmd file used for the given build artifact 1016ad7cbc0SNathan Huckleberry 1026ad7cbc0SNathan Huckleberry Args: 1036ad7cbc0SNathan Huckleberry Path: file path 1046ad7cbc0SNathan Huckleberry 1056ad7cbc0SNathan Huckleberry Returns: 1066ad7cbc0SNathan Huckleberry The path to .cmd file 1076ad7cbc0SNathan Huckleberry """ 1086ad7cbc0SNathan Huckleberry dir, base = os.path.split(path) 1096ad7cbc0SNathan Huckleberry return os.path.join(dir, '.' + base + '.cmd') 1106ad7cbc0SNathan Huckleberry 1116ad7cbc0SNathan Huckleberry 1126ad7cbc0SNathan Huckleberrydef cmdfiles_for_a(archive, ar): 1136ad7cbc0SNathan Huckleberry """Generate the iterator of .cmd files associated with the archive. 1146ad7cbc0SNathan Huckleberry 1156ad7cbc0SNathan Huckleberry Parse the given archive, and yield every .cmd file used to build it. 1166ad7cbc0SNathan Huckleberry 1176ad7cbc0SNathan Huckleberry Args: 1186ad7cbc0SNathan Huckleberry archive: The archive to parse 1196ad7cbc0SNathan Huckleberry 1206ad7cbc0SNathan Huckleberry Yields: 1216ad7cbc0SNathan Huckleberry The path to every .cmd file found 1226ad7cbc0SNathan Huckleberry """ 1236ad7cbc0SNathan Huckleberry for obj in subprocess.check_output([ar, '-t', archive]).decode().split(): 1246ad7cbc0SNathan Huckleberry yield to_cmdfile(obj) 1256ad7cbc0SNathan Huckleberry 1266ad7cbc0SNathan Huckleberry 1276ad7cbc0SNathan Huckleberrydef cmdfiles_for_modorder(modorder): 1286ad7cbc0SNathan Huckleberry """Generate the iterator of .cmd files associated with the modules.order. 1296ad7cbc0SNathan Huckleberry 1306ad7cbc0SNathan Huckleberry Parse the given modules.order, and yield every .cmd file used to build the 1316ad7cbc0SNathan Huckleberry contained modules. 1326ad7cbc0SNathan Huckleberry 1336ad7cbc0SNathan Huckleberry Args: 1346ad7cbc0SNathan Huckleberry modorder: The modules.order file to parse 1356ad7cbc0SNathan Huckleberry 1366ad7cbc0SNathan Huckleberry Yields: 1376ad7cbc0SNathan Huckleberry The path to every .cmd file found 1386ad7cbc0SNathan Huckleberry """ 1396ad7cbc0SNathan Huckleberry with open(modorder) as f: 1406ad7cbc0SNathan Huckleberry for line in f: 141f65a4868SMasahiro Yamada obj = line.rstrip() 142f65a4868SMasahiro Yamada base, ext = os.path.splitext(obj) 143f65a4868SMasahiro Yamada if ext != '.o': 144f65a4868SMasahiro Yamada sys.exit('{}: module path must end with .o'.format(obj)) 1456ad7cbc0SNathan Huckleberry mod = base + '.mod' 146a4ab14e1SJohn Hubbard # Read from *.mod, to get a list of objects that compose the module. 1476ad7cbc0SNathan Huckleberry with open(mod) as m: 148a4ab14e1SJohn Hubbard for mod_line in m: 149a4ab14e1SJohn Hubbard yield to_cmdfile(mod_line.rstrip()) 1506ad7cbc0SNathan Huckleberry 1516ad7cbc0SNathan Huckleberry 1526ad7cbc0SNathan Huckleberrydef process_line(root_directory, command_prefix, file_path): 1536ad7cbc0SNathan Huckleberry """Extracts information from a .cmd line and creates an entry from it. 1546ad7cbc0SNathan Huckleberry 1556ad7cbc0SNathan Huckleberry Args: 1566ad7cbc0SNathan Huckleberry root_directory: The directory that was searched for .cmd files. Usually 1576ad7cbc0SNathan Huckleberry used directly in the "directory" entry in compile_commands.json. 1586ad7cbc0SNathan Huckleberry command_prefix: The extracted command line, up to the last element. 1596ad7cbc0SNathan Huckleberry file_path: The .c file from the end of the extracted command. 1606ad7cbc0SNathan Huckleberry Usually relative to root_directory, but sometimes absolute. 1616ad7cbc0SNathan Huckleberry 1626ad7cbc0SNathan Huckleberry Returns: 1636ad7cbc0SNathan Huckleberry An entry to append to compile_commands. 1646ad7cbc0SNathan Huckleberry 1656ad7cbc0SNathan Huckleberry Raises: 1666ad7cbc0SNathan Huckleberry ValueError: Could not find the extracted file based on file_path and 1676ad7cbc0SNathan Huckleberry root_directory or file_directory. 1686ad7cbc0SNathan Huckleberry """ 1696ad7cbc0SNathan Huckleberry # The .cmd files are intended to be included directly by Make, so they 1706ad7cbc0SNathan Huckleberry # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the 1716ad7cbc0SNathan Huckleberry # kernel version). The compile_commands.json file is not interepreted 1726ad7cbc0SNathan Huckleberry # by Make, so this code replaces the escaped version with '#'. 173*b98f2b86SAndrew Ballance prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#') 1746ad7cbc0SNathan Huckleberry 1756ad7cbc0SNathan Huckleberry # Use os.path.abspath() to normalize the path resolving '.' and '..' . 1766ad7cbc0SNathan Huckleberry abs_path = os.path.abspath(os.path.join(root_directory, file_path)) 1776ad7cbc0SNathan Huckleberry if not os.path.exists(abs_path): 1786ad7cbc0SNathan Huckleberry raise ValueError('File %s not found' % abs_path) 1796ad7cbc0SNathan Huckleberry return { 1806ad7cbc0SNathan Huckleberry 'directory': root_directory, 1816ad7cbc0SNathan Huckleberry 'file': abs_path, 1826ad7cbc0SNathan Huckleberry 'command': prefix + file_path, 1836ad7cbc0SNathan Huckleberry } 1846ad7cbc0SNathan Huckleberry 1856ad7cbc0SNathan Huckleberry 1866ad7cbc0SNathan Huckleberrydef main(): 1876ad7cbc0SNathan Huckleberry """Walks through the directory and finds and parses .cmd files.""" 1886ad7cbc0SNathan Huckleberry log_level, directory, output, ar, paths = parse_arguments() 1896ad7cbc0SNathan Huckleberry 1906ad7cbc0SNathan Huckleberry level = getattr(logging, log_level) 1916ad7cbc0SNathan Huckleberry logging.basicConfig(format='%(levelname)s: %(message)s', level=level) 1926ad7cbc0SNathan Huckleberry 1936ad7cbc0SNathan Huckleberry line_matcher = re.compile(_LINE_PATTERN) 1946ad7cbc0SNathan Huckleberry 1956ad7cbc0SNathan Huckleberry compile_commands = [] 1966ad7cbc0SNathan Huckleberry 1976ad7cbc0SNathan Huckleberry for path in paths: 1986ad7cbc0SNathan Huckleberry # If 'path' is a directory, handle all .cmd files under it. 1996ad7cbc0SNathan Huckleberry # Otherwise, handle .cmd files associated with the file. 20032164845SMasahiro Yamada # built-in objects are linked via vmlinux.a 2016ad7cbc0SNathan Huckleberry # Modules are listed in modules.order. 2026ad7cbc0SNathan Huckleberry if os.path.isdir(path): 2036ad7cbc0SNathan Huckleberry cmdfiles = cmdfiles_in_dir(path) 2046ad7cbc0SNathan Huckleberry elif path.endswith('.a'): 2056ad7cbc0SNathan Huckleberry cmdfiles = cmdfiles_for_a(path, ar) 2066ad7cbc0SNathan Huckleberry elif path.endswith('modules.order'): 2076ad7cbc0SNathan Huckleberry cmdfiles = cmdfiles_for_modorder(path) 2086ad7cbc0SNathan Huckleberry else: 2096ad7cbc0SNathan Huckleberry sys.exit('{}: unknown file type'.format(path)) 2106ad7cbc0SNathan Huckleberry 2116ad7cbc0SNathan Huckleberry for cmdfile in cmdfiles: 2126ad7cbc0SNathan Huckleberry with open(cmdfile, 'rt') as f: 2136ad7cbc0SNathan Huckleberry result = line_matcher.match(f.readline()) 2146ad7cbc0SNathan Huckleberry if result: 2156ad7cbc0SNathan Huckleberry try: 2166ad7cbc0SNathan Huckleberry entry = process_line(directory, result.group(1), 2176ad7cbc0SNathan Huckleberry result.group(2)) 2186ad7cbc0SNathan Huckleberry compile_commands.append(entry) 2196ad7cbc0SNathan Huckleberry except ValueError as err: 2206ad7cbc0SNathan Huckleberry logging.info('Could not add line from %s: %s', 2216ad7cbc0SNathan Huckleberry cmdfile, err) 2226ad7cbc0SNathan Huckleberry 2236ad7cbc0SNathan Huckleberry with open(output, 'wt') as f: 2246ad7cbc0SNathan Huckleberry json.dump(compile_commands, f, indent=2, sort_keys=True) 2256ad7cbc0SNathan Huckleberry 2266ad7cbc0SNathan Huckleberry 2276ad7cbc0SNathan Huckleberryif __name__ == '__main__': 2286ad7cbc0SNathan Huckleberry main() 229