1#!/usr/bin/env python2 2 3"""Find Kconfig symbols that are referenced but not defined.""" 4 5# (c) 2014-2015 Valentin Rothberg <valentinrothberg@gmail.com> 6# (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de> 7# 8# Licensed under the terms of the GNU GPL License version 2 9 10 11import difflib 12import os 13import re 14import signal 15import sys 16from multiprocessing import Pool, cpu_count 17from optparse import OptionParser 18from subprocess import Popen, PIPE, STDOUT 19 20 21# regex expressions 22OPERATORS = r"&|\(|\)|\||\!" 23FEATURE = r"(?:\w*[A-Z0-9]\w*){2,}" 24DEF = r"^\s*(?:menu){,1}config\s+(" + FEATURE + r")\s*" 25EXPR = r"(?:" + OPERATORS + r"|\s|" + FEATURE + r")+" 26DEFAULT = r"default\s+.*?(?:if\s.+){,1}" 27STMT = r"^\s*(?:if|select|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR 28SOURCE_FEATURE = r"(?:\W|\b)+[D]{,1}CONFIG_(" + FEATURE + r")" 29 30# regex objects 31REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$") 32REGEX_FEATURE = re.compile(r'(?!\B)' + FEATURE + r'(?!\B)') 33REGEX_SOURCE_FEATURE = re.compile(SOURCE_FEATURE) 34REGEX_KCONFIG_DEF = re.compile(DEF) 35REGEX_KCONFIG_EXPR = re.compile(EXPR) 36REGEX_KCONFIG_STMT = re.compile(STMT) 37REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$") 38REGEX_FILTER_FEATURES = re.compile(r"[A-Za-z0-9]$") 39REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") 40REGEX_QUOTES = re.compile("(\"(.*?)\")") 41 42 43def parse_options(): 44 """The user interface of this module.""" 45 usage = "%prog [options]\n\n" \ 46 "Run this tool to detect Kconfig symbols that are referenced but " \ 47 "not defined in\nKconfig. The output of this tool has the " \ 48 "format \'Undefined symbol\\tFile list\'\n\n" \ 49 "If no option is specified, %prog will default to check your\n" \ 50 "current tree. Please note that specifying commits will " \ 51 "\'git reset --hard\'\nyour current tree! You may save " \ 52 "uncommitted changes to avoid losing data." 53 54 parser = OptionParser(usage=usage) 55 56 parser.add_option('-c', '--commit', dest='commit', action='store', 57 default="", 58 help="Check if the specified commit (hash) introduces " 59 "undefined Kconfig symbols.") 60 61 parser.add_option('-d', '--diff', dest='diff', action='store', 62 default="", 63 help="Diff undefined symbols between two commits. The " 64 "input format bases on Git log's " 65 "\'commmit1..commit2\'.") 66 67 parser.add_option('-f', '--find', dest='find', action='store_true', 68 default=False, 69 help="Find and show commits that may cause symbols to be " 70 "missing. Required to run with --diff.") 71 72 parser.add_option('-i', '--ignore', dest='ignore', action='store', 73 default="", 74 help="Ignore files matching this pattern. Note that " 75 "the pattern needs to be a Python regex. To " 76 "ignore defconfigs, specify -i '.*defconfig'.") 77 78 parser.add_option('-s', '--sim', dest='sim', action='store', default="", 79 help="Print a list of maximum 10 string-similar symbols.") 80 81 parser.add_option('', '--force', dest='force', action='store_true', 82 default=False, 83 help="Reset current Git tree even when it's dirty.") 84 85 (opts, _) = parser.parse_args() 86 87 if opts.commit and opts.diff: 88 sys.exit("Please specify only one option at once.") 89 90 if opts.diff and not re.match(r"^[\w\-\.]+\.\.[\w\-\.]+$", opts.diff): 91 sys.exit("Please specify valid input in the following format: " 92 "\'commit1..commit2\'") 93 94 if opts.commit or opts.diff: 95 if not opts.force and tree_is_dirty(): 96 sys.exit("The current Git tree is dirty (see 'git status'). " 97 "Running this script may\ndelete important data since it " 98 "calls 'git reset --hard' for some performance\nreasons. " 99 " Please run this script in a clean Git tree or pass " 100 "'--force' if you\nwant to ignore this warning and " 101 "continue.") 102 103 if opts.commit: 104 opts.find = False 105 106 if opts.ignore: 107 try: 108 re.match(opts.ignore, "this/is/just/a/test.c") 109 except: 110 sys.exit("Please specify a valid Python regex.") 111 112 return opts 113 114 115def main(): 116 """Main function of this module.""" 117 opts = parse_options() 118 119 if opts.sim and not opts.commit and not opts.diff: 120 sims = find_sims(opts.sim, opts.ignore) 121 if sims: 122 print "%s: %s" % (yel("Similar symbols"), ', '.join(sims)) 123 else: 124 print "%s: no similar symbols found" % yel("Similar symbols") 125 sys.exit(0) 126 127 # dictionary of (un)defined symbols 128 defined = {} 129 undefined = {} 130 131 if opts.commit or opts.diff: 132 head = get_head() 133 134 # get commit range 135 commit_a = None 136 commit_b = None 137 if opts.commit: 138 commit_a = opts.commit + "~" 139 commit_b = opts.commit 140 elif opts.diff: 141 split = opts.diff.split("..") 142 commit_a = split[0] 143 commit_b = split[1] 144 undefined_a = {} 145 undefined_b = {} 146 147 # get undefined items before the commit 148 execute("git reset --hard %s" % commit_a) 149 undefined_a, _ = check_symbols(opts.ignore) 150 151 # get undefined items for the commit 152 execute("git reset --hard %s" % commit_b) 153 undefined_b, defined = check_symbols(opts.ignore) 154 155 # report cases that are present for the commit but not before 156 for feature in sorted(undefined_b): 157 # feature has not been undefined before 158 if not feature in undefined_a: 159 files = sorted(undefined_b.get(feature)) 160 undefined[feature] = files 161 # check if there are new files that reference the undefined feature 162 else: 163 files = sorted(undefined_b.get(feature) - 164 undefined_a.get(feature)) 165 if files: 166 undefined[feature] = files 167 168 # reset to head 169 execute("git reset --hard %s" % head) 170 171 # default to check the entire tree 172 else: 173 undefined, defined = check_symbols(opts.ignore) 174 175 # now print the output 176 for feature in sorted(undefined): 177 print red(feature) 178 179 files = sorted(undefined.get(feature)) 180 print "%s: %s" % (yel("Referencing files"), ", ".join(files)) 181 182 sims = find_sims(feature, opts.ignore, defined) 183 sims_out = yel("Similar symbols") 184 if sims: 185 print "%s: %s" % (sims_out, ', '.join(sims)) 186 else: 187 print "%s: %s" % (sims_out, "no similar symbols found") 188 189 if opts.find: 190 print "%s:" % yel("Commits changing symbol") 191 commits = find_commits(feature, opts.diff) 192 if commits: 193 for commit in commits: 194 commit = commit.split(" ", 1) 195 print "\t- %s (\"%s\")" % (yel(commit[0]), commit[1]) 196 else: 197 print "\t- no commit found" 198 print # new line 199 200 201def yel(string): 202 """ 203 Color %string yellow. 204 """ 205 return "\033[33m%s\033[0m" % string 206 207 208def red(string): 209 """ 210 Color %string red. 211 """ 212 return "\033[31m%s\033[0m" % string 213 214 215def execute(cmd): 216 """Execute %cmd and return stdout. Exit in case of error.""" 217 pop = Popen(cmd, stdout=PIPE, stderr=STDOUT, shell=True) 218 (stdout, _) = pop.communicate() # wait until finished 219 if pop.returncode != 0: 220 sys.exit(stdout) 221 return stdout 222 223 224def find_commits(symbol, diff): 225 """Find commits changing %symbol in the given range of %diff.""" 226 commits = execute("git log --pretty=oneline --abbrev-commit -G %s %s" 227 % (symbol, diff)) 228 return [x for x in commits.split("\n") if x] 229 230 231def tree_is_dirty(): 232 """Return true if the current working tree is dirty (i.e., if any file has 233 been added, deleted, modified, renamed or copied but not committed).""" 234 stdout = execute("git status --porcelain") 235 for line in stdout: 236 if re.findall(r"[URMADC]{1}", line[:2]): 237 return True 238 return False 239 240 241def get_head(): 242 """Return commit hash of current HEAD.""" 243 stdout = execute("git rev-parse HEAD") 244 return stdout.strip('\n') 245 246 247def partition(lst, size): 248 """Partition list @lst into eveni-sized lists of size @size.""" 249 return [lst[i::size] for i in xrange(size)] 250 251 252def init_worker(): 253 """Set signal handler to ignore SIGINT.""" 254 signal.signal(signal.SIGINT, signal.SIG_IGN) 255 256 257def find_sims(symbol, ignore, defined = []): 258 """Return a list of max. ten Kconfig symbols that are string-similar to 259 @symbol.""" 260 if defined: 261 return sorted(difflib.get_close_matches(symbol, set(defined), 10)) 262 263 pool = Pool(cpu_count(), init_worker) 264 kfiles = [] 265 for gitfile in get_files(): 266 if REGEX_FILE_KCONFIG.match(gitfile): 267 kfiles.append(gitfile) 268 269 arglist = [] 270 for part in partition(kfiles, cpu_count()): 271 arglist.append((part, ignore)) 272 273 for res in pool.map(parse_kconfig_files, arglist): 274 defined.extend(res[0]) 275 276 return sorted(difflib.get_close_matches(symbol, set(defined), 10)) 277 278 279def get_files(): 280 """Return a list of all files in the current git directory.""" 281 # use 'git ls-files' to get the worklist 282 stdout = execute("git ls-files") 283 if len(stdout) > 0 and stdout[-1] == "\n": 284 stdout = stdout[:-1] 285 286 files = [] 287 for gitfile in stdout.rsplit("\n"): 288 if ".git" in gitfile or "ChangeLog" in gitfile or \ 289 ".log" in gitfile or os.path.isdir(gitfile) or \ 290 gitfile.startswith("tools/"): 291 continue 292 files.append(gitfile) 293 return files 294 295 296def check_symbols(ignore): 297 """Find undefined Kconfig symbols and return a dict with the symbol as key 298 and a list of referencing files as value. Files matching %ignore are not 299 checked for undefined symbols.""" 300 pool = Pool(cpu_count(), init_worker) 301 try: 302 return check_symbols_helper(pool, ignore) 303 except KeyboardInterrupt: 304 pool.terminate() 305 pool.join() 306 sys.exit(1) 307 308 309def check_symbols_helper(pool, ignore): 310 """Helper method for check_symbols(). Used to catch keyboard interrupts in 311 check_symbols() in order to properly terminate running worker processes.""" 312 source_files = [] 313 kconfig_files = [] 314 defined_features = [] 315 referenced_features = dict() # {file: [features]} 316 317 for gitfile in get_files(): 318 if REGEX_FILE_KCONFIG.match(gitfile): 319 kconfig_files.append(gitfile) 320 else: 321 if ignore and not re.match(ignore, gitfile): 322 continue 323 # add source files that do not match the ignore pattern 324 source_files.append(gitfile) 325 326 # parse source files 327 arglist = partition(source_files, cpu_count()) 328 for res in pool.map(parse_source_files, arglist): 329 referenced_features.update(res) 330 331 332 # parse kconfig files 333 arglist = [] 334 for part in partition(kconfig_files, cpu_count()): 335 arglist.append((part, ignore)) 336 for res in pool.map(parse_kconfig_files, arglist): 337 defined_features.extend(res[0]) 338 referenced_features.update(res[1]) 339 defined_features = set(defined_features) 340 341 # inverse mapping of referenced_features to dict(feature: [files]) 342 inv_map = dict() 343 for _file, features in referenced_features.iteritems(): 344 for feature in features: 345 inv_map[feature] = inv_map.get(feature, set()) 346 inv_map[feature].add(_file) 347 referenced_features = inv_map 348 349 undefined = {} # {feature: [files]} 350 for feature in sorted(referenced_features): 351 # filter some false positives 352 if feature == "FOO" or feature == "BAR" or \ 353 feature == "FOO_BAR" or feature == "XXX": 354 continue 355 if feature not in defined_features: 356 if feature.endswith("_MODULE"): 357 # avoid false positives for kernel modules 358 if feature[:-len("_MODULE")] in defined_features: 359 continue 360 undefined[feature] = referenced_features.get(feature) 361 return undefined, defined_features 362 363 364def parse_source_files(source_files): 365 """Parse each source file in @source_files and return dictionary with source 366 files as keys and lists of references Kconfig symbols as values.""" 367 referenced_features = dict() 368 for sfile in source_files: 369 referenced_features[sfile] = parse_source_file(sfile) 370 return referenced_features 371 372 373def parse_source_file(sfile): 374 """Parse @sfile and return a list of referenced Kconfig features.""" 375 lines = [] 376 references = [] 377 378 if not os.path.exists(sfile): 379 return references 380 381 with open(sfile, "r") as stream: 382 lines = stream.readlines() 383 384 for line in lines: 385 if not "CONFIG_" in line: 386 continue 387 features = REGEX_SOURCE_FEATURE.findall(line) 388 for feature in features: 389 if not REGEX_FILTER_FEATURES.search(feature): 390 continue 391 references.append(feature) 392 393 return references 394 395 396def get_features_in_line(line): 397 """Return mentioned Kconfig features in @line.""" 398 return REGEX_FEATURE.findall(line) 399 400 401def parse_kconfig_files(args): 402 """Parse kconfig files and return tuple of defined and references Kconfig 403 symbols. Note, @args is a tuple of a list of files and the @ignore 404 pattern.""" 405 kconfig_files = args[0] 406 ignore = args[1] 407 defined_features = [] 408 referenced_features = dict() 409 410 for kfile in kconfig_files: 411 defined, references = parse_kconfig_file(kfile) 412 defined_features.extend(defined) 413 if ignore and re.match(ignore, kfile): 414 # do not collect references for files that match the ignore pattern 415 continue 416 referenced_features[kfile] = references 417 return (defined_features, referenced_features) 418 419 420def parse_kconfig_file(kfile): 421 """Parse @kfile and update feature definitions and references.""" 422 lines = [] 423 defined = [] 424 references = [] 425 skip = False 426 427 if not os.path.exists(kfile): 428 return defined, references 429 430 with open(kfile, "r") as stream: 431 lines = stream.readlines() 432 433 for i in range(len(lines)): 434 line = lines[i] 435 line = line.strip('\n') 436 line = line.split("#")[0] # ignore comments 437 438 if REGEX_KCONFIG_DEF.match(line): 439 feature_def = REGEX_KCONFIG_DEF.findall(line) 440 defined.append(feature_def[0]) 441 skip = False 442 elif REGEX_KCONFIG_HELP.match(line): 443 skip = True 444 elif skip: 445 # ignore content of help messages 446 pass 447 elif REGEX_KCONFIG_STMT.match(line): 448 line = REGEX_QUOTES.sub("", line) 449 features = get_features_in_line(line) 450 # multi-line statements 451 while line.endswith("\\"): 452 i += 1 453 line = lines[i] 454 line = line.strip('\n') 455 features.extend(get_features_in_line(line)) 456 for feature in set(features): 457 if REGEX_NUMERIC.match(feature): 458 # ignore numeric values 459 continue 460 references.append(feature) 461 462 return defined, references 463 464 465if __name__ == "__main__": 466 main() 467