1#!/usr/bin/env python3 2 3"""Find Kconfig symbols that are referenced but not defined.""" 4 5# (c) 2014-2016 Valentin Rothberg <valentinrothberg@gmail.com> 6# (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de> 7# 8# Licensed under the terms of the GNU GPL License version 2 9 10 11import argparse 12import difflib 13import os 14import re 15import signal 16import subprocess 17import sys 18from multiprocessing import Pool, cpu_count 19 20 21# regex expressions 22OPERATORS = r"&|\(|\)|\||\!" 23SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}" 24DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*" 25EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+" 26DEFAULT = r"default\s+.*?(?:if\s.+){,1}" 27STMT = r"^\s*(?:if|select|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR 28SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")" 29 30# regex objects 31REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$") 32REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)') 33REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL) 34REGEX_KCONFIG_DEF = re.compile(DEF) 35REGEX_KCONFIG_EXPR = re.compile(EXPR) 36REGEX_KCONFIG_STMT = re.compile(STMT) 37REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$") 38REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$") 39REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") 40REGEX_QUOTES = re.compile("(\"(.*?)\")") 41 42 43def parse_options(): 44 """The user interface of this module.""" 45 usage = "Run this tool to detect Kconfig symbols that are referenced but " \ 46 "not defined in Kconfig. If no option is specified, " \ 47 "checkkconfigsymbols defaults to check your current tree. " \ 48 "Please note that specifying commits will 'git reset --hard\' " \ 49 "your current tree! You may save uncommitted changes to avoid " \ 50 "losing data." 51 52 parser = argparse.ArgumentParser(description=usage) 53 54 parser.add_argument('-c', '--commit', dest='commit', action='store', 55 default="", 56 help="check if the specified commit (hash) introduces " 57 "undefined Kconfig symbols") 58 59 parser.add_argument('-d', '--diff', dest='diff', action='store', 60 default="", 61 help="diff undefined symbols between two commits " 62 "(e.g., -d commmit1..commit2)") 63 64 parser.add_argument('-f', '--find', dest='find', action='store_true', 65 default=False, 66 help="find and show commits that may cause symbols to be " 67 "missing (required to run with --diff)") 68 69 parser.add_argument('-i', '--ignore', dest='ignore', action='store', 70 default="", 71 help="ignore files matching this Python regex " 72 "(e.g., -i '.*defconfig')") 73 74 parser.add_argument('-s', '--sim', dest='sim', action='store', default="", 75 help="print a list of max. 10 string-similar symbols") 76 77 parser.add_argument('--force', dest='force', action='store_true', 78 default=False, 79 help="reset current Git tree even when it's dirty") 80 81 parser.add_argument('--no-color', dest='color', action='store_false', 82 default=True, 83 help="don't print colored output (default when not " 84 "outputting to a terminal)") 85 86 args = parser.parse_args() 87 88 if args.commit and args.diff: 89 sys.exit("Please specify only one option at once.") 90 91 if args.diff and not re.match(r"^[\w\-\.]+\.\.[\w\-\.]+$", args.diff): 92 sys.exit("Please specify valid input in the following format: " 93 "\'commit1..commit2\'") 94 95 if args.commit or args.diff: 96 if not args.force and tree_is_dirty(): 97 sys.exit("The current Git tree is dirty (see 'git status'). " 98 "Running this script may\ndelete important data since it " 99 "calls 'git reset --hard' for some performance\nreasons. " 100 " Please run this script in a clean Git tree or pass " 101 "'--force' if you\nwant to ignore this warning and " 102 "continue.") 103 104 if args.commit: 105 args.find = False 106 107 if args.ignore: 108 try: 109 re.match(args.ignore, "this/is/just/a/test.c") 110 except: 111 sys.exit("Please specify a valid Python regex.") 112 113 return args 114 115 116def main(): 117 """Main function of this module.""" 118 args = parse_options() 119 120 global COLOR 121 COLOR = args.color and sys.stdout.isatty() 122 123 if args.sim and not args.commit and not args.diff: 124 sims = find_sims(args.sim, args.ignore) 125 if sims: 126 print("%s: %s" % (yel("Similar symbols"), ', '.join(sims))) 127 else: 128 print("%s: no similar symbols found" % yel("Similar symbols")) 129 sys.exit(0) 130 131 # dictionary of (un)defined symbols 132 defined = {} 133 undefined = {} 134 135 if args.commit or args.diff: 136 head = get_head() 137 138 # get commit range 139 commit_a = None 140 commit_b = None 141 if args.commit: 142 commit_a = args.commit + "~" 143 commit_b = args.commit 144 elif args.diff: 145 split = args.diff.split("..") 146 commit_a = split[0] 147 commit_b = split[1] 148 undefined_a = {} 149 undefined_b = {} 150 151 # get undefined items before the commit 152 reset(commit_a) 153 undefined_a, _ = check_symbols(args.ignore) 154 155 # get undefined items for the commit 156 reset(commit_b) 157 undefined_b, defined = check_symbols(args.ignore) 158 159 # report cases that are present for the commit but not before 160 for symbol in sorted(undefined_b): 161 # symbol has not been undefined before 162 if symbol not in undefined_a: 163 files = sorted(undefined_b.get(symbol)) 164 undefined[symbol] = files 165 # check if there are new files that reference the undefined symbol 166 else: 167 files = sorted(undefined_b.get(symbol) - 168 undefined_a.get(symbol)) 169 if files: 170 undefined[symbol] = files 171 172 # reset to head 173 reset(head) 174 175 # default to check the entire tree 176 else: 177 undefined, defined = check_symbols(args.ignore) 178 179 # now print the output 180 for symbol in sorted(undefined): 181 print(red(symbol)) 182 183 files = sorted(undefined.get(symbol)) 184 print("%s: %s" % (yel("Referencing files"), ", ".join(files))) 185 186 sims = find_sims(symbol, args.ignore, defined) 187 sims_out = yel("Similar symbols") 188 if sims: 189 print("%s: %s" % (sims_out, ', '.join(sims))) 190 else: 191 print("%s: %s" % (sims_out, "no similar symbols found")) 192 193 if args.find: 194 print("%s:" % yel("Commits changing symbol")) 195 commits = find_commits(symbol, args.diff) 196 if commits: 197 for commit in commits: 198 commit = commit.split(" ", 1) 199 print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1])) 200 else: 201 print("\t- no commit found") 202 print() # new line 203 204 205def reset(commit): 206 """Reset current git tree to %commit.""" 207 execute(["git", "reset", "--hard", commit]) 208 209 210def yel(string): 211 """ 212 Color %string yellow. 213 """ 214 return "\033[33m%s\033[0m" % string if COLOR else string 215 216 217def red(string): 218 """ 219 Color %string red. 220 """ 221 return "\033[31m%s\033[0m" % string if COLOR else string 222 223 224def execute(cmd): 225 """Execute %cmd and return stdout. Exit in case of error.""" 226 try: 227 stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False) 228 stdout = stdout.decode(errors='replace') 229 except subprocess.CalledProcessError as fail: 230 exit(fail) 231 return stdout 232 233 234def find_commits(symbol, diff): 235 """Find commits changing %symbol in the given range of %diff.""" 236 commits = execute(["git", "log", "--pretty=oneline", 237 "--abbrev-commit", "-G", 238 symbol, diff]) 239 return [x for x in commits.split("\n") if x] 240 241 242def tree_is_dirty(): 243 """Return true if the current working tree is dirty (i.e., if any file has 244 been added, deleted, modified, renamed or copied but not committed).""" 245 stdout = execute(["git", "status", "--porcelain"]) 246 for line in stdout: 247 if re.findall(r"[URMADC]{1}", line[:2]): 248 return True 249 return False 250 251 252def get_head(): 253 """Return commit hash of current HEAD.""" 254 stdout = execute(["git", "rev-parse", "HEAD"]) 255 return stdout.strip('\n') 256 257 258def partition(lst, size): 259 """Partition list @lst into eveni-sized lists of size @size.""" 260 return [lst[i::size] for i in range(size)] 261 262 263def init_worker(): 264 """Set signal handler to ignore SIGINT.""" 265 signal.signal(signal.SIGINT, signal.SIG_IGN) 266 267 268def find_sims(symbol, ignore, defined=[]): 269 """Return a list of max. ten Kconfig symbols that are string-similar to 270 @symbol.""" 271 if defined: 272 return sorted(difflib.get_close_matches(symbol, set(defined), 10)) 273 274 pool = Pool(cpu_count(), init_worker) 275 kfiles = [] 276 for gitfile in get_files(): 277 if REGEX_FILE_KCONFIG.match(gitfile): 278 kfiles.append(gitfile) 279 280 arglist = [] 281 for part in partition(kfiles, cpu_count()): 282 arglist.append((part, ignore)) 283 284 for res in pool.map(parse_kconfig_files, arglist): 285 defined.extend(res[0]) 286 287 return sorted(difflib.get_close_matches(symbol, set(defined), 10)) 288 289 290def get_files(): 291 """Return a list of all files in the current git directory.""" 292 # use 'git ls-files' to get the worklist 293 stdout = execute(["git", "ls-files"]) 294 if len(stdout) > 0 and stdout[-1] == "\n": 295 stdout = stdout[:-1] 296 297 files = [] 298 for gitfile in stdout.rsplit("\n"): 299 if ".git" in gitfile or "ChangeLog" in gitfile or \ 300 ".log" in gitfile or os.path.isdir(gitfile) or \ 301 gitfile.startswith("tools/"): 302 continue 303 files.append(gitfile) 304 return files 305 306 307def check_symbols(ignore): 308 """Find undefined Kconfig symbols and return a dict with the symbol as key 309 and a list of referencing files as value. Files matching %ignore are not 310 checked for undefined symbols.""" 311 pool = Pool(cpu_count(), init_worker) 312 try: 313 return check_symbols_helper(pool, ignore) 314 except KeyboardInterrupt: 315 pool.terminate() 316 pool.join() 317 sys.exit(1) 318 319 320def check_symbols_helper(pool, ignore): 321 """Helper method for check_symbols(). Used to catch keyboard interrupts in 322 check_symbols() in order to properly terminate running worker processes.""" 323 source_files = [] 324 kconfig_files = [] 325 defined_symbols = [] 326 referenced_symbols = dict() # {file: [symbols]} 327 328 for gitfile in get_files(): 329 if REGEX_FILE_KCONFIG.match(gitfile): 330 kconfig_files.append(gitfile) 331 else: 332 if ignore and not re.match(ignore, gitfile): 333 continue 334 # add source files that do not match the ignore pattern 335 source_files.append(gitfile) 336 337 # parse source files 338 arglist = partition(source_files, cpu_count()) 339 for res in pool.map(parse_source_files, arglist): 340 referenced_symbols.update(res) 341 342 # parse kconfig files 343 arglist = [] 344 for part in partition(kconfig_files, cpu_count()): 345 arglist.append((part, ignore)) 346 for res in pool.map(parse_kconfig_files, arglist): 347 defined_symbols.extend(res[0]) 348 referenced_symbols.update(res[1]) 349 defined_symbols = set(defined_symbols) 350 351 # inverse mapping of referenced_symbols to dict(symbol: [files]) 352 inv_map = dict() 353 for _file, symbols in referenced_symbols.items(): 354 for symbol in symbols: 355 inv_map[symbol] = inv_map.get(symbol, set()) 356 inv_map[symbol].add(_file) 357 referenced_symbols = inv_map 358 359 undefined = {} # {symbol: [files]} 360 for symbol in sorted(referenced_symbols): 361 # filter some false positives 362 if symbol == "FOO" or symbol == "BAR" or \ 363 symbol == "FOO_BAR" or symbol == "XXX": 364 continue 365 if symbol not in defined_symbols: 366 if symbol.endswith("_MODULE"): 367 # avoid false positives for kernel modules 368 if symbol[:-len("_MODULE")] in defined_symbols: 369 continue 370 undefined[symbol] = referenced_symbols.get(symbol) 371 return undefined, defined_symbols 372 373 374def parse_source_files(source_files): 375 """Parse each source file in @source_files and return dictionary with source 376 files as keys and lists of references Kconfig symbols as values.""" 377 referenced_symbols = dict() 378 for sfile in source_files: 379 referenced_symbols[sfile] = parse_source_file(sfile) 380 return referenced_symbols 381 382 383def parse_source_file(sfile): 384 """Parse @sfile and return a list of referenced Kconfig symbols.""" 385 lines = [] 386 references = [] 387 388 if not os.path.exists(sfile): 389 return references 390 391 with open(sfile, "r", encoding='utf-8', errors='replace') as stream: 392 lines = stream.readlines() 393 394 for line in lines: 395 if "CONFIG_" not in line: 396 continue 397 symbols = REGEX_SOURCE_SYMBOL.findall(line) 398 for symbol in symbols: 399 if not REGEX_FILTER_SYMBOLS.search(symbol): 400 continue 401 references.append(symbol) 402 403 return references 404 405 406def get_symbols_in_line(line): 407 """Return mentioned Kconfig symbols in @line.""" 408 return REGEX_SYMBOL.findall(line) 409 410 411def parse_kconfig_files(args): 412 """Parse kconfig files and return tuple of defined and references Kconfig 413 symbols. Note, @args is a tuple of a list of files and the @ignore 414 pattern.""" 415 kconfig_files = args[0] 416 ignore = args[1] 417 defined_symbols = [] 418 referenced_symbols = dict() 419 420 for kfile in kconfig_files: 421 defined, references = parse_kconfig_file(kfile) 422 defined_symbols.extend(defined) 423 if ignore and re.match(ignore, kfile): 424 # do not collect references for files that match the ignore pattern 425 continue 426 referenced_symbols[kfile] = references 427 return (defined_symbols, referenced_symbols) 428 429 430def parse_kconfig_file(kfile): 431 """Parse @kfile and update symbol definitions and references.""" 432 lines = [] 433 defined = [] 434 references = [] 435 skip = False 436 437 if not os.path.exists(kfile): 438 return defined, references 439 440 with open(kfile, "r", encoding='utf-8', errors='replace') as stream: 441 lines = stream.readlines() 442 443 for i in range(len(lines)): 444 line = lines[i] 445 line = line.strip('\n') 446 line = line.split("#")[0] # ignore comments 447 448 if REGEX_KCONFIG_DEF.match(line): 449 symbol_def = REGEX_KCONFIG_DEF.findall(line) 450 defined.append(symbol_def[0]) 451 skip = False 452 elif REGEX_KCONFIG_HELP.match(line): 453 skip = True 454 elif skip: 455 # ignore content of help messages 456 pass 457 elif REGEX_KCONFIG_STMT.match(line): 458 line = REGEX_QUOTES.sub("", line) 459 symbols = get_symbols_in_line(line) 460 # multi-line statements 461 while line.endswith("\\"): 462 i += 1 463 line = lines[i] 464 line = line.strip('\n') 465 symbols.extend(get_symbols_in_line(line)) 466 for symbol in set(symbols): 467 if REGEX_NUMERIC.match(symbol): 468 # ignore numeric values 469 continue 470 references.append(symbol) 471 472 return defined, references 473 474 475if __name__ == "__main__": 476 main() 477