Merge remote-tracking branch 'remotes/stsquad/tags/pull-testing-gdbstub-docs-080221...
[qemu/ar7.git] / scripts / codeconverter / codeconverter / regexps.py
blob77993cc3b9759d1d35495ff4f212f0dd2fb44e4d
1 # Copyright (C) 2020 Red Hat Inc.
3 # Authors:
4 # Eduardo Habkost <ehabkost@redhat.com>
6 # This work is licensed under the terms of the GNU GPL, version 2. See
7 # the COPYING file in the top-level directory.
8 """Helpers for creation of regular expressions"""
9 import re
11 import logging
12 logger = logging.getLogger(__name__)
13 DBG = logger.debug
14 INFO = logger.info
15 WARN = logger.warning
17 def S(*regexps) -> str:
18 """Just a shortcut to concatenate multiple regexps more easily"""
19 return ''.join(regexps)
21 def P(*regexps, name=None, capture=False, repeat='') -> str:
22 """Just add parenthesis around regexp(s), with optional name or repeat suffix"""
23 s = S(*regexps)
24 if name:
25 return f'(?P<{name}>{s}){repeat}'
26 elif capture:
27 return f'({s}){repeat}'
28 else:
29 return f'(?:{s}){repeat}'
31 def NAMED(name, *regexps) -> str:
32 """Make named group using <P<name>...) syntax
34 >>> NAMED('mygroup', 'xyz', 'abc')
35 '(?P<mygroup>xyzabc)'
36 """
37 return P(*regexps, name=name)
39 def OR(*regexps, **kwargs) -> str:
40 """Build (a|b|c) regexp"""
41 return P('|'.join(regexps), **kwargs)
43 def M(*regexps, n='*', name=None) -> str:
44 """Add repetition qualifier to regexp(s)
46 >>> M('a', 'b')
47 '(?:ab)*'
48 >>> M('a' , 'b', n='+')
49 '(?:ab)+'
50 >>> M('a' , 'b', n='{2,3}', name='name')
51 '(?P<name>(?:ab){2,3})'
52 """
53 r = P(*regexps, repeat=n)
54 if name:
55 r = NAMED(name, r)
56 return r
58 # helper to make parenthesis optional around regexp
59 OPTIONAL_PARS = lambda R: OR(S(r'\(\s*', R, r'\s*\)'), R)
60 def test_optional_pars():
61 r = OPTIONAL_PARS('abc')+'$'
62 assert re.match(r, 'abc')
63 assert re.match(r, '(abc)')
64 assert not re.match(r, '(abcd)')
65 assert not re.match(r, '(abc')
66 assert not re.match(r, 'abc)')
69 # this disables the MULTILINE flag, so it will match at the
70 # beginning of the file:
71 RE_FILE_BEGIN = r'(?-m:^)'
73 # C primitives:
75 SP = r'\s*'
77 RE_COMMENT = r'//[^\n]*$|/\*([^*]|\*[^/])*\*/'
78 RE_COMMENTS = M(RE_COMMENT + SP)
80 RE_IDENTIFIER = r'[a-zA-Z_][a-zA-Z0-9_]*(?![a-zA-Z0-9])'
81 RE_STRING = r'\"([^\"\\]|\\[a-z\"])*\"'
82 RE_NUMBER = r'[0-9]+|0x[0-9a-fA-F]+'
84 # space or escaped newlines:
85 CPP_SPACE = OR(r'\s', r'\\\n', repeat='+')
87 RE_PATH = '[a-zA-Z0-9/_.-]+'
89 RE_INCLUDEPATH = OR(S(r'\"', RE_PATH, r'\"'),
90 S(r'<', RE_PATH, r'>'))
92 RE_INCLUDE = S(r'^[ \t]*#[ \t]*include[ \t]+', NAMED('includepath', RE_INCLUDEPATH), r'[ \t]*\n')
93 RE_SIMPLEDEFINE = S(r'^[ \t]*#[ \t]*define[ \t]+', RE_IDENTIFIER, r'[ \t]*\n')
95 RE_STRUCT_TYPE = S(r'struct\s+', RE_IDENTIFIER)
96 RE_TYPE = OR(RE_IDENTIFIER, RE_STRUCT_TYPE)
98 RE_MACRO_CONCAT = M(S(OR(RE_IDENTIFIER, RE_STRING), SP), n='{2,}')
100 RE_SIMPLE_VALUE = OR(RE_IDENTIFIER, RE_STRING, RE_NUMBER)
102 RE_FUN_CALL = S(RE_IDENTIFIER, r'\s*\(\s*', RE_SIMPLE_VALUE, r'\s*\)')
103 RE_SIZEOF = S(r'sizeof\s*\(\s*', NAMED('sizeoftype', RE_TYPE), r'\s*\)')
105 RE_ADDRESS = S(r'&\s*', RE_IDENTIFIER)
107 RE_ARRAY_ITEM = S(r'{\s*', NAMED('arrayitem', M(RE_SIMPLE_VALUE, n='?')), r'\s*}\s*,?')
108 RE_ARRAY_CAST = S(r'\(\s*', RE_IDENTIFIER, r'\s*\[\s*\]\)')
109 RE_ARRAY_ITEMS = M(S(RE_ARRAY_ITEM, SP))
110 RE_ARRAY = S(M(RE_ARRAY_CAST, n='?'), r'\s*{\s*',
111 NAMED('arrayitems', RE_ARRAY_ITEMS),
112 r'}')
114 # NOTE: this covers a very small subset of valid expressions
116 RE_EXPRESSION = OR(RE_SIZEOF, RE_FUN_CALL, RE_MACRO_CONCAT, RE_SIMPLE_VALUE,
117 RE_ARRAY, RE_ADDRESS)