2020-10-23 02:42:51 +02:00
|
|
|
import re
|
|
|
|
|
|
|
|
from ._regexes import (
|
|
|
|
LOCAL as _LOCAL,
|
|
|
|
LOCAL_STATICS as _LOCAL_STATICS,
|
|
|
|
)
|
|
|
|
from ._common import (
|
|
|
|
log_match,
|
|
|
|
parse_var_decl,
|
|
|
|
set_capture_groups,
|
|
|
|
match_paren,
|
|
|
|
)
|
|
|
|
from ._compound_decl_body import DECL_BODY_PARSERS
|
|
|
|
|
|
|
|
|
|
|
|
LOCAL = set_capture_groups(_LOCAL, (
|
|
|
|
'EMPTY',
|
|
|
|
'INLINE_LEADING',
|
|
|
|
'INLINE_PRE',
|
|
|
|
'INLINE_KIND',
|
|
|
|
'INLINE_NAME',
|
|
|
|
'STORAGE',
|
|
|
|
'VAR_DECL',
|
|
|
|
'VAR_INIT',
|
|
|
|
'VAR_ENDING',
|
|
|
|
'COMPOUND_BARE',
|
|
|
|
'COMPOUND_LABELED',
|
|
|
|
'COMPOUND_PAREN',
|
|
|
|
'BLOCK_LEADING',
|
|
|
|
'BLOCK_OPEN',
|
|
|
|
'SIMPLE_STMT',
|
|
|
|
'SIMPLE_ENDING',
|
|
|
|
'BLOCK_CLOSE',
|
|
|
|
))
|
|
|
|
LOCAL_RE = re.compile(rf'^ \s* {LOCAL}', re.VERBOSE)
|
|
|
|
|
|
|
|
|
|
|
|
# Note that parse_function_body() still has trouble with a few files
|
|
|
|
# in the CPython codebase.
|
|
|
|
|
|
|
|
def parse_function_body(source, name, anon_name):
|
|
|
|
# XXX
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
def parse_function_body(name, text, resolve, source, anon_name, parent):
|
|
|
|
raise NotImplementedError
|
|
|
|
# For now we do not worry about locals declared in for loop "headers".
|
|
|
|
depth = 1;
|
|
|
|
while depth > 0:
|
|
|
|
m = LOCAL_RE.match(text)
|
|
|
|
while not m:
|
|
|
|
text, resolve = continue_text(source, text or '{', resolve)
|
|
|
|
m = LOCAL_RE.match(text)
|
|
|
|
text = text[m.end():]
|
|
|
|
(
|
|
|
|
empty,
|
|
|
|
inline_leading, inline_pre, inline_kind, inline_name,
|
|
|
|
storage, decl,
|
|
|
|
var_init, var_ending,
|
|
|
|
compound_bare, compound_labeled, compound_paren,
|
|
|
|
block_leading, block_open,
|
|
|
|
simple_stmt, simple_ending,
|
|
|
|
block_close,
|
|
|
|
) = m.groups()
|
|
|
|
|
|
|
|
if empty:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('', m, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
resolve(None, None, None, text)
|
|
|
|
yield None, text
|
|
|
|
elif inline_kind:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('', m, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
kind = inline_kind
|
|
|
|
name = inline_name or anon_name('inline-')
|
|
|
|
data = [] # members
|
|
|
|
# We must set the internal "text" from _iter_source() to the
|
|
|
|
# start of the inline compound body,
|
|
|
|
# Note that this is effectively like a forward reference that
|
|
|
|
# we do not emit.
|
|
|
|
resolve(kind, None, name, text, None)
|
|
|
|
_parse_body = DECL_BODY_PARSERS[kind]
|
|
|
|
before = []
|
|
|
|
ident = f'{kind} {name}'
|
|
|
|
for member, inline, text in _parse_body(text, resolve, source, anon_name, ident):
|
|
|
|
if member:
|
|
|
|
data.append(member)
|
|
|
|
if inline:
|
|
|
|
yield from inline
|
|
|
|
# un-inline the decl. Note that it might not actually be inline.
|
|
|
|
# We handle the case in the "maybe_inline_actual" branch.
|
|
|
|
text = f'{inline_leading or ""} {inline_pre or ""} {kind} {name} {text}'
|
|
|
|
# XXX Should "parent" really be None for inline type decls?
|
|
|
|
yield resolve(kind, data, name, text, None), text
|
|
|
|
elif block_close:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('', m, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
depth -= 1
|
|
|
|
resolve(None, None, None, text)
|
|
|
|
# XXX This isn't great. Calling resolve() should have
|
|
|
|
# cleared the closing bracket. However, some code relies
|
|
|
|
# on the yielded value instead of the resolved one. That
|
|
|
|
# needs to be fixed.
|
|
|
|
yield None, text
|
|
|
|
elif compound_bare:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('', m, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
yield resolve('statement', compound_bare, None, text, parent), text
|
|
|
|
elif compound_labeled:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('', m, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
yield resolve('statement', compound_labeled, None, text, parent), text
|
|
|
|
elif compound_paren:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('', m, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
try:
|
|
|
|
pos = match_paren(text)
|
|
|
|
except ValueError:
|
|
|
|
text = f'{compound_paren} {text}'
|
|
|
|
#resolve(None, None, None, text)
|
|
|
|
text, resolve = continue_text(source, text, resolve)
|
|
|
|
yield None, text
|
|
|
|
else:
|
|
|
|
head = text[:pos]
|
|
|
|
text = text[pos:]
|
|
|
|
if compound_paren == 'for':
|
|
|
|
# XXX Parse "head" as a compound statement.
|
|
|
|
stmt1, stmt2, stmt3 = head.split(';', 2)
|
|
|
|
data = {
|
|
|
|
'compound': compound_paren,
|
|
|
|
'statements': (stmt1, stmt2, stmt3),
|
|
|
|
}
|
|
|
|
else:
|
|
|
|
data = {
|
|
|
|
'compound': compound_paren,
|
|
|
|
'statement': head,
|
|
|
|
}
|
|
|
|
yield resolve('statement', data, None, text, parent), text
|
|
|
|
elif block_open:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('', m, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
depth += 1
|
|
|
|
if block_leading:
|
|
|
|
# An inline block: the last evaluated expression is used
|
|
|
|
# in place of the block.
|
|
|
|
# XXX Combine it with the remainder after the block close.
|
|
|
|
stmt = f'{block_open}{{<expr>}}...;'
|
|
|
|
yield resolve('statement', stmt, None, text, parent), text
|
|
|
|
else:
|
|
|
|
resolve(None, None, None, text)
|
|
|
|
yield None, text
|
|
|
|
elif simple_ending:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('', m, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
yield resolve('statement', simple_stmt, None, text, parent), text
|
|
|
|
elif var_ending:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('', m, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
kind = 'variable'
|
|
|
|
_, name, vartype = parse_var_decl(decl)
|
|
|
|
data = {
|
|
|
|
'storage': storage,
|
|
|
|
'vartype': vartype,
|
|
|
|
}
|
|
|
|
after = ()
|
|
|
|
if var_ending == ',':
|
|
|
|
# It was a multi-declaration, so queue up the next one.
|
|
|
|
_, qual, typespec, _ = vartype.values()
|
|
|
|
text = f'{storage or ""} {qual or ""} {typespec} {text}'
|
|
|
|
yield resolve(kind, data, name, text, parent), text
|
|
|
|
if var_init:
|
|
|
|
_data = f'{name} = {var_init.strip()}'
|
|
|
|
yield resolve('statement', _data, None, text, parent), text
|
|
|
|
else:
|
|
|
|
# This should be unreachable.
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
#############################
|
|
|
|
# static local variables
|
|
|
|
|
|
|
|
LOCAL_STATICS = set_capture_groups(_LOCAL_STATICS, (
|
|
|
|
'INLINE_LEADING',
|
|
|
|
'INLINE_PRE',
|
|
|
|
'INLINE_KIND',
|
|
|
|
'INLINE_NAME',
|
|
|
|
'STATIC_DECL',
|
|
|
|
'STATIC_INIT',
|
|
|
|
'STATIC_ENDING',
|
|
|
|
'DELIM_LEADING',
|
|
|
|
'BLOCK_OPEN',
|
|
|
|
'BLOCK_CLOSE',
|
|
|
|
'STMT_END',
|
|
|
|
))
|
|
|
|
LOCAL_STATICS_RE = re.compile(rf'^ \s* {LOCAL_STATICS}', re.VERBOSE)
|
|
|
|
|
|
|
|
|
|
|
|
def parse_function_statics(source, func, anon_name):
|
|
|
|
# For now we do not worry about locals declared in for loop "headers".
|
|
|
|
depth = 1;
|
|
|
|
while depth > 0:
|
|
|
|
for srcinfo in source:
|
|
|
|
m = LOCAL_STATICS_RE.match(srcinfo.text)
|
|
|
|
if m:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
# We ran out of lines.
|
|
|
|
if srcinfo is not None:
|
|
|
|
srcinfo.done()
|
|
|
|
return
|
|
|
|
for item, depth in _parse_next_local_static(m, srcinfo,
|
|
|
|
anon_name, func, depth):
|
|
|
|
if callable(item):
|
|
|
|
parse_body = item
|
|
|
|
yield from parse_body(source)
|
|
|
|
elif item is not None:
|
|
|
|
yield item
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_next_local_static(m, srcinfo, anon_name, func, depth):
|
|
|
|
(inline_leading, inline_pre, inline_kind, inline_name,
|
|
|
|
static_decl, static_init, static_ending,
|
|
|
|
_delim_leading,
|
|
|
|
block_open,
|
|
|
|
block_close,
|
|
|
|
stmt_end,
|
|
|
|
) = m.groups()
|
|
|
|
remainder = srcinfo.text[m.end():]
|
|
|
|
|
|
|
|
if inline_kind:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('func inline', m, depth, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
kind = inline_kind
|
|
|
|
name = inline_name or anon_name('inline-')
|
|
|
|
# Immediately emit a forward declaration.
|
|
|
|
yield srcinfo.resolve(kind, name=name, data=None), depth
|
|
|
|
|
|
|
|
# un-inline the decl. Note that it might not actually be inline.
|
|
|
|
# We handle the case in the "maybe_inline_actual" branch.
|
|
|
|
srcinfo.nest(
|
|
|
|
remainder,
|
|
|
|
f'{inline_leading or ""} {inline_pre or ""} {kind} {name}'
|
|
|
|
)
|
|
|
|
def parse_body(source):
|
|
|
|
_parse_body = DECL_BODY_PARSERS[kind]
|
|
|
|
|
|
|
|
data = [] # members
|
|
|
|
ident = f'{kind} {name}'
|
|
|
|
for item in _parse_body(source, anon_name, ident):
|
|
|
|
if item.kind == 'field':
|
|
|
|
data.append(item)
|
|
|
|
else:
|
|
|
|
yield item
|
|
|
|
# XXX Should "parent" really be None for inline type decls?
|
|
|
|
yield srcinfo.resolve(kind, data, name, parent=None)
|
|
|
|
|
|
|
|
srcinfo.resume()
|
|
|
|
yield parse_body, depth
|
|
|
|
|
|
|
|
elif static_decl:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('local variable', m, depth, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
_, name, data = parse_var_decl(static_decl)
|
|
|
|
|
|
|
|
yield srcinfo.resolve('variable', data, name, parent=func), depth
|
|
|
|
|
|
|
|
if static_init:
|
|
|
|
srcinfo.advance(f'{name} {static_init} {remainder}')
|
|
|
|
elif static_ending == ',':
|
|
|
|
# It was a multi-declaration, so queue up the next one.
|
|
|
|
_, qual, typespec, _ = data.values()
|
|
|
|
srcinfo.advance(f'static {qual or ""} {typespec} {remainder}')
|
|
|
|
else:
|
|
|
|
srcinfo.advance('')
|
|
|
|
|
|
|
|
else:
|
|
|
|
log_match('func other', m)
|
|
|
|
if block_open:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('func other', None, depth, depth + 1)
|
2020-10-23 02:42:51 +02:00
|
|
|
depth += 1
|
|
|
|
elif block_close:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('func other', None, depth, depth - 1)
|
2020-10-23 02:42:51 +02:00
|
|
|
depth -= 1
|
|
|
|
elif stmt_end:
|
2023-03-07 03:40:09 +01:00
|
|
|
log_match('func other', None, depth, depth)
|
2020-10-23 02:42:51 +02:00
|
|
|
pass
|
|
|
|
else:
|
|
|
|
# This should be unreachable.
|
|
|
|
raise NotImplementedError
|
|
|
|
srcinfo.advance(remainder)
|
|
|
|
yield None, depth
|