190 lines
7.9 KiB
Python
190 lines
7.9 KiB
Python
from . import ast
|
|
from . import lexer
|
|
|
|
from typing import TextIO
|
|
|
|
import ast as pyast
|
|
|
|
__all__ = ['ExpectedError', 'Parser']
|
|
|
|
class ExpectedError(Exception):
|
|
def __init__(self, pos: ast.Position, expected: str, got: str):
|
|
super().__init__(f'{pos}: expected {expected}; got {"EOF" if got == '' else repr(got)}')
|
|
self.pos = pos
|
|
self.got = got
|
|
self.expected = expected
|
|
|
|
class Parser:
|
|
_prev: tuple[lexer.Token, ast.Position, str] | None = None
|
|
|
|
def __init__(self, stream: TextIO, name: str):
|
|
self.lexer = lexer.Lexer(stream, name)
|
|
|
|
def _scan(self) -> tuple[lexer.Token, ast.Position, str]:
|
|
if self._prev is not None:
|
|
prev = self._prev
|
|
self._prev = None
|
|
return prev
|
|
return self.lexer.scan()
|
|
|
|
def _unscan(self, tok: lexer.Token, pos: ast.Position, lit: str):
|
|
self._prev = tok, pos, lit
|
|
|
|
def _parse_index(self, val: ast.Value) -> ast.Index:
|
|
index = ast.Index(pos=val.pos, value=val, index=self._parse_expr())
|
|
tok, pos, lit = self._scan()
|
|
if tok != lexer.Token.SQUARE or lit != ']':
|
|
raise ExpectedError(pos, 'closing square bracket', lit)
|
|
return index
|
|
|
|
def _parse_expr(self) -> ast.Value:
|
|
left = self._parse_value()
|
|
tok, pos, lit = self._scan()
|
|
while tok == lexer.Token.SQUARE and lit == '[':
|
|
left = self._parse_index(left)
|
|
# Scan the next token for the next if statement
|
|
tok, pos, lit = self._scan()
|
|
if tok != lexer.Token.OPERATOR:
|
|
self._unscan(tok, pos, lit)
|
|
return left
|
|
right = self._parse_expr()
|
|
return ast.BinaryExpression(pos=left.pos, op=ast.Operator(pos=pos, value=lit), left=left, right=right)
|
|
|
|
def _parse_tuple(self, start_pos: ast.Position) -> ast.Tuple:
|
|
items: list[ast.Value] = []
|
|
while True:
|
|
tok, pos, lit = self._scan()
|
|
if tok == lexer.Token.SQUARE and lit == ']':
|
|
break
|
|
self._unscan(tok, pos, lit)
|
|
items.append(self._parse_expr())
|
|
|
|
tok, pos, lit = self._scan()
|
|
if tok != lexer.Token.COMMA and (tok != lexer.Token.SQUARE or lit != ']'):
|
|
raise ExpectedError(pos, 'comma or closing square bracket', lit)
|
|
elif tok == lexer.Token.SQUARE and lit == ']':
|
|
break
|
|
return ast.Tuple(start_pos, items)
|
|
|
|
def _parse_object(self, start_pos: ast.Position) -> ast.Object:
|
|
items: list[tuple[ast.Value, ast.Value]] = []
|
|
while True:
|
|
tok, pos, lit = self._scan()
|
|
if tok == lexer.Token.CURLY and lit == '}':
|
|
break
|
|
self._unscan(tok, pos, lit)
|
|
key = self._parse_expr()
|
|
|
|
tok, pos, lit = self._scan()
|
|
if tok != lexer.Token.COLON and (tok != lexer.Token.OPERATOR or lit != '='):
|
|
raise ExpectedError(pos, 'colon or equals sign', lit)
|
|
|
|
val = self._parse_expr()
|
|
items.append((key, val))
|
|
|
|
tok, pos, lit = self._scan()
|
|
if tok != lexer.Token.COMMA:
|
|
self._unscan(tok, pos, lit)
|
|
|
|
return ast.Object(start_pos, items)
|
|
|
|
def _parse_func_call(self) -> ast.FunctionCall:
|
|
id_tok, id_pos, id_lit = self._scan()
|
|
tok, pos, lit = self._scan()
|
|
if tok != lexer.Token.PAREN or lit != '(':
|
|
raise ExpectedError(pos, 'opening parentheses', lit)
|
|
|
|
tok, pos, lit = self._scan()
|
|
if tok == lexer.Token.PAREN and lit == ')':
|
|
return ast.FunctionCall(pos=id_pos, name=id_lit, args=[])
|
|
self._unscan(tok, pos, lit)
|
|
|
|
args: list[ast.Value] = []
|
|
while True:
|
|
args.append(self._parse_expr())
|
|
tok, pos, lit = self._scan()
|
|
if tok == lexer.Token.PAREN and lit == ')':
|
|
break
|
|
elif tok == lexer.Token.COMMA:
|
|
continue
|
|
elif tok == lexer.Token.ELLIPSIS:
|
|
args[-1] = ast.Expansion(pos=args[-1].pos, value=args[-1])
|
|
tok, pos, lit = self._scan()
|
|
if tok != lexer.Token.PAREN or lit != ')':
|
|
raise ExpectedError(pos, 'closing parentheses', lit)
|
|
break
|
|
else:
|
|
raise ExpectedError(pos, 'comma or closing parentheses', lit)
|
|
return ast.FunctionCall(pos=id_pos, name=id_lit, args=args)
|
|
|
|
def _parse_value(self) -> ast.Value:
|
|
tok, pos, lit = self._scan()
|
|
match tok:
|
|
case lexer.Token.INTEGER:
|
|
return ast.Integer(pos=pos, value=int(lit))
|
|
case lexer.Token.FLOAT:
|
|
return ast.Float(pos=pos, value=float(lit))
|
|
case lexer.Token.BOOL:
|
|
return ast.Bool(pos=pos, value=(lit == 'true'))
|
|
case lexer.Token.STRING:
|
|
return ast.String(pos=pos, value=pyast.literal_eval(lit))
|
|
case lexer.Token.IDENT:
|
|
if self.lexer._peek(1) == '(':
|
|
self._unscan(tok, pos, lit)
|
|
return self._parse_func_call()
|
|
return ast.VariableRef(pos=pos, name=lit)
|
|
case lexer.Token.HEREDOC:
|
|
return ast.String(pos=pos, value=lit)
|
|
case lexer.Token.OPERATOR:
|
|
return ast.UnaryExpression(pos=pos, op=ast.Operator(pos=pos, value=lit), value=self._parse_value())
|
|
case lexer.Token.SQUARE:
|
|
if lit != '[':
|
|
raise ExpectedError(pos, repr('['), lit)
|
|
return self._parse_tuple(pos)
|
|
case lexer.Token.CURLY:
|
|
if lit != '{':
|
|
raise ExpectedError(pos, repr('{'), lit)
|
|
return self._parse_object(pos)
|
|
case lexer.Token.PAREN:
|
|
if lit != '(':
|
|
raise ExpectedError(pos, repr('('), lit)
|
|
expr = self._parse_expr()
|
|
tok, pos, lit = self._scan()
|
|
if tok != lexer.Token.PAREN or lit != ')':
|
|
raise ExpectedError(pos, repr(')'), lit)
|
|
return expr
|
|
|
|
raise ExpectedError(pos, 'value', lit)
|
|
|
|
def parse(self, until: tuple[lexer.Token, str] = (lexer.Token.EOF, '')) -> ast.AST:
|
|
tree = []
|
|
while True:
|
|
id_tok, id_pos, id_lit = self._scan()
|
|
if id_tok == until[0] and id_lit == until[1]:
|
|
break
|
|
|
|
if id_tok != lexer.Token.IDENT:
|
|
raise ExpectedError(id_pos, str(lexer.Token.IDENT), id_lit)
|
|
|
|
tok, pos, lit = self._scan()
|
|
if tok == lexer.Token.OPERATOR and lit == '=':
|
|
tree.append(ast.Assignment(pos=id_pos, name=id_lit, value=self._parse_expr()))
|
|
elif tok == lexer.Token.CURLY and lit == '{':
|
|
tree.append(ast.Block(pos=id_pos, name=id_lit, labels=[], children=self.parse(until=(lexer.Token.CURLY, '}'))))
|
|
elif tok in (lexer.Token.STRING, lexer.Token.IDENT):
|
|
labels = []
|
|
while tok in (lexer.Token.STRING, lexer.Token.IDENT):
|
|
if tok == lexer.Token.IDENT:
|
|
labels.append(lit)
|
|
else:
|
|
self._unscan(tok, pos, lit)
|
|
val = self._parse_value()
|
|
assert isinstance(val, ast.String)
|
|
labels.append(val.value)
|
|
tok, pos, lit = self._scan()
|
|
if tok != lexer.Token.CURLY and lit != '{':
|
|
raise ExpectedError(pos, repr('{'), lit)
|
|
tree.append(ast.Block(pos=id_pos, name=id_lit, labels=labels, children=self.parse(until=(lexer.Token.CURLY, '}'))))
|
|
else:
|
|
raise ExpectedError(pos, "equals sign, opening curly brace, or string", lit)
|
|
return tree |