1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0-only 3 4"""Find Kconfig symbols that are referenced but not defined.""" 5 6# (c) 2014-2017 Valentin Rothberg <valentinrothberg@gmail.com> 7# (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de> 8# 9 10 11import argparse 12import difflib 13import os 14import re 15import signal 16import subprocess 17import sys 18from multiprocessing import Pool, cpu_count 19 20 21# regex expressions 22OPERATORS = r"&|\(|\)|\||\!" 23SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}" 24DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*" 25EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+" 26DEFAULT = r"default\s+.*?(?:if\s.+){,1}" 27STMT = r"^\s*(?:if|select|imply|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR 28SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")" 29 30# regex objects 31REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$") 32REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)') 33REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL) 34REGEX_KCONFIG_DEF = re.compile(DEF) 35REGEX_KCONFIG_EXPR = re.compile(EXPR) 36REGEX_KCONFIG_STMT = re.compile(STMT) 37REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$") 38REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") 39REGEX_QUOTES = re.compile("(\"(.*?)\")") 40 41 42def parse_options(): 43 """The user interface of this module.""" 44 usage = "Run this tool to detect Kconfig symbols that are referenced but " \ 45 "not defined in Kconfig. If no option is specified, " \ 46 "checkkconfigsymbols defaults to check your current tree. " \ 47 "Please note that specifying commits will 'git reset --hard\' " \ 48 "your current tree! You may save uncommitted changes to avoid " \ 49 "losing data." 50 51 parser = argparse.ArgumentParser(description=usage) 52 53 parser.add_argument('-c', '--commit', dest='commit', action='store', 54 default="", 55 help="check if the specified commit (hash) introduces " 56 "undefined Kconfig symbols") 57 58 parser.add_argument('-d', '--diff', dest='diff', action='store', 59 default="", 60 help="diff undefined symbols between two commits " 61 "(e.g., -d commmit1..commit2)") 62 63 parser.add_argument('-f', '--find', dest='find', action='store_true', 64 default=False, 65 help="find and show commits that may cause symbols to be " 66 "missing (required to run with --diff)") 67 68 parser.add_argument('-i', '--ignore', dest='ignore', action='store', 69 default="", 70 help="ignore files matching this Python regex " 71 "(e.g., -i '.*defconfig')") 72 73 parser.add_argument('-s', '--sim', dest='sim', action='store', default="", 74 help="print a list of max. 10 string-similar symbols") 75 76 parser.add_argument('--force', dest='force', action='store_true', 77 default=False, 78 help="reset current Git tree even when it's dirty") 79 80 parser.add_argument('--no-color', dest='color', action='store_false', 81 default=True, 82 help="don't print colored output (default when not " 83 "outputting to a terminal)") 84 85 args = parser.parse_args() 86 87 if args.commit and args.diff: 88 sys.exit("Please specify only one option at once.") 89 90 if args.diff and not re.match(r"^[\w\-\.\^]+\.\.[\w\-\.\^]+$", args.diff): 91 sys.exit("Please specify valid input in the following format: " 92 "\'commit1..commit2\'") 93 94 if args.commit or args.diff: 95 if not args.force and tree_is_dirty(): 96 sys.exit("The current Git tree is dirty (see 'git status'). " 97 "Running this script may\ndelete important data since it " 98 "calls 'git reset --hard' for some performance\nreasons. " 99 " Please run this script in a clean Git tree or pass " 100 "'--force' if you\nwant to ignore this warning and " 101 "continue.") 102 103 if args.commit: 104 if args.commit.startswith('HEAD'): 105 sys.exit("The --commit option can't use the HEAD ref") 106 107 args.find = False 108 109 if args.ignore: 110 try: 111 re.match(args.ignore, "this/is/just/a/test.c") 112 except: 113 sys.exit("Please specify a valid Python regex.") 114 115 return args 116 117 118def main(): 119 """Main function of this module.""" 120 args = parse_options() 121 122 global COLOR 123 COLOR = args.color and sys.stdout.isatty() 124 125 if args.sim and not args.commit and not args.diff: 126 sims = find_sims(args.sim, args.ignore) 127 if sims: 128 print("%s: %s" % (yel("Similar symbols"), ', '.join(sims))) 129 else: 130 print("%s: no similar symbols found" % yel("Similar symbols")) 131 sys.exit(0) 132 133 # dictionary of (un)defined symbols 134 defined = {} 135 undefined = {} 136 137 if args.commit or args.diff: 138 head = get_head() 139 140 # get commit range 141 commit_a = None 142 commit_b = None 143 if args.commit: 144 commit_a = args.commit + "~" 145 commit_b = args.commit 146 elif args.diff: 147 split = args.diff.split("..") 148 commit_a = split[0] 149 commit_b = split[1] 150 undefined_a = {} 151 undefined_b = {} 152 153 # get undefined items before the commit 154 reset(commit_a) 155 undefined_a, _ = check_symbols(args.ignore) 156 157 # get undefined items for the commit 158 reset(commit_b) 159 undefined_b, defined = check_symbols(args.ignore) 160 161 # report cases that are present for the commit but not before 162 for symbol in sorted(undefined_b): 163 # symbol has not been undefined before 164 if symbol not in undefined_a: 165 files = sorted(undefined_b.get(symbol)) 166 undefined[symbol] = files 167 # check if there are new files that reference the undefined symbol 168 else: 169 files = sorted(undefined_b.get(symbol) - 170 undefined_a.get(symbol)) 171 if files: 172 undefined[symbol] = files 173 174 # reset to head 175 reset(head) 176 177 # default to check the entire tree 178 else: 179 undefined, defined = check_symbols(args.ignore) 180 181 # now print the output 182 for symbol in sorted(undefined): 183 print(red(symbol)) 184 185 files = sorted(undefined.get(symbol)) 186 print("%s: %s" % (yel("Referencing files"), ", ".join(files))) 187 188 sims = find_sims(symbol, args.ignore, defined) 189 sims_out = yel("Similar symbols") 190 if sims: 191 print("%s: %s" % (sims_out, ', '.join(sims))) 192 else: 193 print("%s: %s" % (sims_out, "no similar symbols found")) 194 195 if args.find: 196 print("%s:" % yel("Commits changing symbol")) 197 commits = find_commits(symbol, args.diff) 198 if commits: 199 for commit in commits: 200 commit = commit.split(" ", 1) 201 print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1])) 202 else: 203 print("\t- no commit found") 204 print() # new line 205 206 207def reset(commit): 208 """Reset current git tree to %commit.""" 209 execute(["git", "reset", "--hard", commit]) 210 211 212def yel(string): 213 """ 214 Color %string yellow. 215 """ 216 return "\033[33m%s\033[0m" % string if COLOR else string 217 218 219def red(string): 220 """ 221 Color %string red. 222 """ 223 return "\033[31m%s\033[0m" % string if COLOR else string 224 225 226def execute(cmd): 227 """Execute %cmd and return stdout. Exit in case of error.""" 228 try: 229 stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False) 230 stdout = stdout.decode(errors='replace') 231 except subprocess.CalledProcessError as fail: 232 exit(fail) 233 return stdout 234 235 236def find_commits(symbol, diff): 237 """Find commits changing %symbol in the given range of %diff.""" 238 commits = execute(["git", "log", "--pretty=oneline", 239 "--abbrev-commit", "-G", 240 symbol, diff]) 241 return [x for x in commits.split("\n") if x] 242 243 244def tree_is_dirty(): 245 """Return true if the current working tree is dirty (i.e., if any file has 246 been added, deleted, modified, renamed or copied but not committed).""" 247 stdout = execute(["git", "status", "--porcelain"]) 248 for line in stdout: 249 if re.findall(r"[URMADC]{1}", line[:2]): 250 return True 251 return False 252 253 254def get_head(): 255 """Return commit hash of current HEAD.""" 256 stdout = execute(["git", "rev-parse", "HEAD"]) 257 return stdout.strip('\n') 258 259 260def partition(lst, size): 261 """Partition list @lst into eveni-sized lists of size @size.""" 262 return [lst[i::size] for i in range(size)] 263 264 265def init_worker(): 266 """Set signal handler to ignore SIGINT.""" 267 signal.signal(signal.SIGINT, signal.SIG_IGN) 268 269 270def find_sims(symbol, ignore, defined=[]): 271 """Return a list of max. ten Kconfig symbols that are string-similar to 272 @symbol.""" 273 if defined: 274 return difflib.get_close_matches(symbol, set(defined), 10) 275 276 pool = Pool(cpu_count(), init_worker) 277 kfiles = [] 278 for gitfile in get_files(): 279 if REGEX_FILE_KCONFIG.match(gitfile): 280 kfiles.append(gitfile) 281 282 arglist = [] 283 for part in partition(kfiles, cpu_count()): 284 arglist.append((part, ignore)) 285 286 for res in pool.map(parse_kconfig_files, arglist): 287 defined.extend(res[0]) 288 289 return difflib.get_close_matches(symbol, set(defined), 10) 290 291 292def get_files(): 293 """Return a list of all files in the current git directory.""" 294 # use 'git ls-files' to get the worklist 295 stdout = execute(["git", "ls-files"]) 296 if len(stdout) > 0 and stdout[-1] == "\n": 297 stdout = stdout[:-1] 298 299 files = [] 300 for gitfile in stdout.rsplit("\n"): 301 if ".git" in gitfile or "ChangeLog" in gitfile or \ 302 ".log" in gitfile or os.path.isdir(gitfile) or \ 303 gitfile.startswith("tools/"): 304 continue 305 files.append(gitfile) 306 return files 307 308 309def check_symbols(ignore): 310 """Find undefined Kconfig symbols and return a dict with the symbol as key 311 and a list of referencing files as value. Files matching %ignore are not 312 checked for undefined symbols.""" 313 pool = Pool(cpu_count(), init_worker) 314 try: 315 return check_symbols_helper(pool, ignore) 316 except KeyboardInterrupt: 317 pool.terminate() 318 pool.join() 319 sys.exit(1) 320 321 322def check_symbols_helper(pool, ignore): 323 """Helper method for check_symbols(). Used to catch keyboard interrupts in 324 check_symbols() in order to properly terminate running worker processes.""" 325 source_files = [] 326 kconfig_files = [] 327 defined_symbols = [] 328 referenced_symbols = dict() # {file: [symbols]} 329 330 for gitfile in get_files(): 331 if REGEX_FILE_KCONFIG.match(gitfile): 332 kconfig_files.append(gitfile) 333 else: 334 if ignore and re.match(ignore, gitfile): 335 continue 336 # add source files that do not match the ignore pattern 337 source_files.append(gitfile) 338 339 # parse source files 340 arglist = partition(source_files, cpu_count()) 341 for res in pool.map(parse_source_files, arglist): 342 referenced_symbols.update(res) 343 344 # parse kconfig files 345 arglist = [] 346 for part in partition(kconfig_files, cpu_count()): 347 arglist.append((part, ignore)) 348 for res in pool.map(parse_kconfig_files, arglist): 349 defined_symbols.extend(res[0]) 350 referenced_symbols.update(res[1]) 351 defined_symbols = set(defined_symbols) 352 353 # inverse mapping of referenced_symbols to dict(symbol: [files]) 354 inv_map = dict() 355 for _file, symbols in referenced_symbols.items(): 356 for symbol in symbols: 357 inv_map[symbol] = inv_map.get(symbol, set()) 358 inv_map[symbol].add(_file) 359 referenced_symbols = inv_map 360 361 undefined = {} # {symbol: [files]} 362 for symbol in sorted(referenced_symbols): 363 # filter some false positives 364 if symbol == "FOO" or symbol == "BAR" or \ 365 symbol == "FOO_BAR" or symbol == "XXX": 366 continue 367 if symbol not in defined_symbols: 368 if symbol.endswith("_MODULE"): 369 # avoid false positives for kernel modules 370 if symbol[:-len("_MODULE")] in defined_symbols: 371 continue 372 undefined[symbol] = referenced_symbols.get(symbol) 373 return undefined, defined_symbols 374 375 376def parse_source_files(source_files): 377 """Parse each source file in @source_files and return dictionary with source 378 files as keys and lists of references Kconfig symbols as values.""" 379 referenced_symbols = dict() 380 for sfile in source_files: 381 referenced_symbols[sfile] = parse_source_file(sfile) 382 return referenced_symbols 383 384 385def parse_source_file(sfile): 386 """Parse @sfile and return a list of referenced Kconfig symbols.""" 387 lines = [] 388 references = [] 389 390 if not os.path.exists(sfile): 391 return references 392 393 with open(sfile, "r", encoding='utf-8', errors='replace') as stream: 394 lines = stream.readlines() 395 396 for line in lines: 397 if "CONFIG_" not in line: 398 continue 399 symbols = REGEX_SOURCE_SYMBOL.findall(line) 400 for symbol in symbols: 401 if not REGEX_FILTER_SYMBOLS.search(symbol): 402 continue 403 references.append(symbol) 404 405 return references 406 407 408def get_symbols_in_line(line): 409 """Return mentioned Kconfig symbols in @line.""" 410 return REGEX_SYMBOL.findall(line) 411 412 413def parse_kconfig_files(args): 414 """Parse kconfig files and return tuple of defined and references Kconfig 415 symbols. Note, @args is a tuple of a list of files and the @ignore 416 pattern.""" 417 kconfig_files = args[0] 418 ignore = args[1] 419 defined_symbols = [] 420 referenced_symbols = dict() 421 422 for kfile in kconfig_files: 423 defined, references = parse_kconfig_file(kfile) 424 defined_symbols.extend(defined) 425 if ignore and re.match(ignore, kfile): 426 # do not collect references for files that match the ignore pattern 427 continue 428 referenced_symbols[kfile] = references 429 return (defined_symbols, referenced_symbols) 430 431 432def parse_kconfig_file(kfile): 433 """Parse @kfile and update symbol definitions and references.""" 434 lines = [] 435 defined = [] 436 references = [] 437 438 if not os.path.exists(kfile): 439 return defined, references 440 441 with open(kfile, "r", encoding='utf-8', errors='replace') as stream: 442 lines = stream.readlines() 443 444 for i in range(len(lines)): 445 line = lines[i] 446 line = line.strip('\n') 447 line = line.split("#")[0] # ignore comments 448 449 if REGEX_KCONFIG_DEF.match(line): 450 symbol_def = REGEX_KCONFIG_DEF.findall(line) 451 defined.append(symbol_def[0]) 452 elif REGEX_KCONFIG_STMT.match(line): 453 line = REGEX_QUOTES.sub("", line) 454 symbols = get_symbols_in_line(line) 455 # multi-line statements 456 while line.endswith("\\"): 457 i += 1 458 line = lines[i] 459 line = line.strip('\n') 460 symbols.extend(get_symbols_in_line(line)) 461 for symbol in set(symbols): 462 if REGEX_NUMERIC.match(symbol): 463 # ignore numeric values 464 continue 465 references.append(symbol) 466 467 return defined, references 468 469 470if __name__ == "__main__": 471 main() 472