1# Copyright (C) 2020 Red Hat Inc. 2# 3# Authors: 4# Eduardo Habkost <ehabkost@redhat.com> 5# 6# This work is licensed under the terms of the GNU GPL, version 2. See 7# the COPYING file in the top-level directory. 8"""Helpers for creation of regular expressions""" 9import re 10 11import logging 12logger = logging.getLogger(__name__) 13DBG = logger.debug 14INFO = logger.info 15WARN = logger.warning 16 17def S(*regexps) -> str: 18 """Just a shortcut to concatenate multiple regexps more easily""" 19 return ''.join(regexps) 20 21def P(*regexps, name=None, capture=False, repeat='') -> str: 22 """Just add parenthesis around regexp(s), with optional name or repeat suffix""" 23 s = S(*regexps) 24 if name: 25 return f'(?P<{name}>{s}){repeat}' 26 elif capture: 27 return f'({s}){repeat}' 28 else: 29 return f'(?:{s}){repeat}' 30 31def NAMED(name, *regexps) -> str: 32 """Make named group using <P<name>...) syntax 33 34 >>> NAMED('mygroup', 'xyz', 'abc') 35 '(?P<mygroup>xyzabc)' 36 """ 37 return P(*regexps, name=name) 38 39def OR(*regexps, **kwargs) -> str: 40 """Build (a|b|c) regexp""" 41 return P('|'.join(regexps), **kwargs) 42 43def M(*regexps, n='*', name=None) -> str: 44 """Add repetition qualifier to regexp(s) 45 46 >>> M('a', 'b') 47 '(?:ab)*' 48 >>> M('a' , 'b', n='+') 49 '(?:ab)+' 50 >>> M('a' , 'b', n='{2,3}', name='name') 51 '(?P<name>(?:ab){2,3})' 52 """ 53 r = P(*regexps, repeat=n) 54 if name: 55 r = NAMED(name, r) 56 return r 57 58# helper to make parenthesis optional around regexp 59OPTIONAL_PARS = lambda R: OR(S(r'\(\s*', R, r'\s*\)'), R) 60def test_optional_pars(): 61 r = OPTIONAL_PARS('abc')+'$' 62 assert re.match(r, 'abc') 63 assert re.match(r, '(abc)') 64 assert not re.match(r, '(abcd)') 65 assert not re.match(r, '(abc') 66 assert not re.match(r, 'abc)') 67 68 69# this disables the MULTILINE flag, so it will match at the 70# beginning of the file: 71RE_FILE_BEGIN = r'(?-m:^)' 72 73# C primitives: 74 75SP = r'\s*' 76 77RE_COMMENT = r'//[^\n]*$|/\*([^*]|\*[^/])*\*/' 78RE_COMMENTS = M(RE_COMMENT + SP) 79 80RE_IDENTIFIER = r'[a-zA-Z_][a-zA-Z0-9_]*(?![a-zA-Z0-9])' 81RE_STRING = r'\"([^\"\\]|\\[a-z\"])*\"' 82RE_NUMBER = r'[0-9]+|0x[0-9a-fA-F]+' 83 84# space or escaped newlines: 85CPP_SPACE = OR(r'\s', r'\\\n', repeat='+') 86 87RE_PATH = '[a-zA-Z0-9/_.-]+' 88 89RE_INCLUDEPATH = OR(S(r'\"', RE_PATH, r'\"'), 90 S(r'<', RE_PATH, r'>')) 91 92RE_INCLUDE = S(r'^[ \t]*#[ \t]*include[ \t]+', NAMED('includepath', RE_INCLUDEPATH), r'[ \t]*\n') 93RE_SIMPLEDEFINE = S(r'^[ \t]*#[ \t]*define[ \t]+', RE_IDENTIFIER, r'[ \t]*\n') 94 95RE_STRUCT_TYPE = S(r'struct\s+', RE_IDENTIFIER) 96RE_TYPE = OR(RE_IDENTIFIER, RE_STRUCT_TYPE) 97 98RE_MACRO_CONCAT = M(S(OR(RE_IDENTIFIER, RE_STRING), SP), n='{2,}') 99 100RE_SIMPLE_VALUE = OR(RE_IDENTIFIER, RE_STRING, RE_NUMBER) 101 102RE_FUN_CALL = S(RE_IDENTIFIER, r'\s*\(\s*', RE_SIMPLE_VALUE, r'\s*\)') 103RE_SIZEOF = S(r'sizeof\s*\(\s*', NAMED('sizeoftype', RE_TYPE), r'\s*\)') 104 105RE_ADDRESS = S(r'&\s*', RE_IDENTIFIER) 106 107RE_ARRAY_ITEM = S(r'{\s*', NAMED('arrayitem', M(RE_SIMPLE_VALUE, n='?')), r'\s*}\s*,?') 108RE_ARRAY_CAST = S(r'\(\s*', RE_IDENTIFIER, r'\s*\[\s*\]\)') 109RE_ARRAY_ITEMS = M(S(RE_ARRAY_ITEM, SP)) 110RE_ARRAY = S(M(RE_ARRAY_CAST, n='?'), r'\s*{\s*', 111 NAMED('arrayitems', RE_ARRAY_ITEMS), 112 r'}') 113 114# NOTE: this covers a very small subset of valid expressions 115 116RE_EXPRESSION = OR(RE_SIZEOF, RE_FUN_CALL, RE_MACRO_CONCAT, RE_SIMPLE_VALUE, 117 RE_ARRAY, RE_ADDRESS) 118 119