from dataclasses import dataclass import imp from webbrowser import Opera from .defs import Token, IntegerLiteralToken, TokenType, OperatorToken, KeywordToken, IdentifierToken, Span from .lexer import Lexer from .helpers import ParserIterator from .errors import CompilerError, EndOfInputError, InvalidTokenError, UnsupportedSyntaxError, print_warning from typing import Tuple, Optional, List, Dict, Set, Iterable @dataclass(frozen=True) class Type: name: IdentifierToken | str wraps: Tuple['Type', ...] @dataclass(frozen=True) class Value: type: Optional[Type] value: 'ASTNode' @dataclass(frozen=True) class FunctionArgument: name: IdentifierToken type: Type #default_value: Value | None @dataclass(frozen=True) class ASTNode: pass @dataclass(frozen=True) class FunctionNode(ASTNode): name: IdentifierToken args: Tuple[FunctionArgument, ...] return_type: Type contents: Tuple[ASTNode, ...] @dataclass(frozen=True) class FunctionCallNode(ASTNode): function: Value arguments: List[Value] @dataclass(frozen=True) class VariableDeclarationNode(ASTNode): name: IdentifierToken modifiers: List[str] type: Type value: Value @dataclass(frozen=True) class ForLoopNode(ASTNode): variable_name: IdentifierToken | str iterator: Value body: List[ASTNode] @dataclass(frozen=True) class SpreadOperatorNode(ASTNode): left_side: Value right_side: Value type: Type | None @dataclass(frozen=True) class IntegerImmediateNode(ASTNode): value: int type: Type span: Span @dataclass(frozen=True) class StringImmediateNode(ASTNode): value: str type: Type span: Span @dataclass(frozen=True) class VariableNameNode(ASTNode): name: IdentifierToken @dataclass(frozen=True) class BracketetExpressionNode(ASTNode): content: ASTNode @dataclass(frozen=True) class UseStatement(ASTNode): path: List[IdentifierToken] class Parser: """ This class takes a lexed input and produces a syntax tree. This only validates syntax, but does no type checking etc... """ types: Dict[str, Type] lexer: Lexer tokens: ParserIterator[Token] def __init__(self, lexer: Lexer): self.variables = dict() self.lexer = lexer # strip comments from tokens self.tokens = ParserIterator(lexer.do_parse()) def parse(self): body = [] while True: thing = self.parse_file_level_block() if thing is None: return body body.append(thing) print(thing) def consume_next_token(self, types:Set[TokenType]|TokenType = None, content: str = None, msg: str = None): if not isinstance(types, set) and types is not None: types = {types} peeked = self.tokens.peek() if peeked is None: raise EndOfInputError(self.tokens.last_item.span, content) if types is not None and peeked.kind not in types: raise InvalidTokenError(peeked, (*types, content), msg) if content is not None and peeked.content != content: raise InvalidTokenError(peeked, {content}, msg) return self.tokens.next() def consume_optional_eol(self): """ This function tries to consume EOL tokens, if they are available """ while self.tokens.peek().kind == TokenType.EOL: self.tokens.next() def consume_expected_eol(self, msg): """ This function consumes at least one EOL token, or fails """ if self.tokens.peek().kind != TokenType.EOL: raise InvalidTokenError(self.tokens.peek(), expected_type=["\\n"], message=msg) while self.tokens.peek().kind == TokenType.EOL: self.tokens.next() def consume_optional(self, types:Set[TokenType]|TokenType = None, content: str = None, msg: str = None): try: return self.consume_next_token(types, content, msg) except InvalidTokenError: return False def parse_file_level_block(self) -> ASTNode | None: """ File-level blocks are statements written at the file level. """ # this part ignores newlines! prev_ignore_lvl = self.tokens.ignore_newline self.tokens.ignore_newline = True try: match self.tokens.peek(): case KeywordToken(content="function"): return self.parse_function_definition() case KeywordToken(content="struct"): return self.parse_struct() case KeywordToken(content="const"): return self.parse_const_declaration() case KeywordToken(content="use"): return self.parse_import_statement() case Token(kind=TokenType.EOI): return None case None: raise Exception("Unexpected None token!") case unknown_token: raise InvalidTokenError(unknown_token, ("function", "struct"), "Only function and struct declarations are allowed at file-level!") finally: self.tokens.ignore_newline = prev_ignore_lvl def parse_import_statement(self): """ parse an import-equivalent statement: use std.String """ self.consume_next_token(types=TokenType.Keyword, content="use") path = [] if self.tokens.peek().kind == TokenType.String: raise UnsupportedSyntaxError(self.tokens.peek(), "file paths in use statements!") prev = self.tokens.ignore_newline self.tokens.ignore_newline = False while self.tokens.peek().kind != TokenType.EOL: path.append(self.consume_next_token(TokenType.Identifier)) if self.tokens.peek().content == '.': self.consume_next_token(types=TokenType.Operator, content='.') self.consume_expected_eol("'use' statement must be terminated by EOL!") self.tokens.ignore_newline = prev return UseStatement(path) def parse_basic_block(self) -> Iterable[ASTNode]: """ A "Basic Block" is a block inside a function, for loop, etc. """ # when parsing blocks, newlines are important! prev_ignore_lvl = self.tokens.ignore_newline self.tokens.ignore_newline = False if prev_ignore_lvl: # consume all remaining EOLs self.consume_optional_eol() try: while True: match self.tokens.peek(): case KeywordToken(content="function"): yield self.parse_function_definition() case KeywordToken(content="const"): yield self.parse_const_declaration() case KeywordToken(content="let"): raise UnsupportedSyntaxError(self.tokens.peek(), "'let' not supported yet") case KeywordToken(content="for"): yield self.parse_for_statement() case KeywordToken(content="return"): raise UnsupportedSyntaxError(self.tokens.peek(), "'return' not supported yet") case KeywordToken(content="if"): raise UnsupportedSyntaxError(self.tokens.peek(), "'if' not supported yet") case KeywordToken(content="struct"): # TODO: support raise UnsupportedSyntaxError(self.tokens.peek(), "structs not supported yet") case Token(kind=TokenType.RBracket, content="}"): break case other: yield self.parse_value() self.consume_expected_eol(msg="Only one statement per line permitted!") finally: self.tokens.ignore_newline = prev_ignore_lvl def parse_function_definition(self): """ Parses a function definition including the body """ self.tokens.next() function_name = self.consume_next_token(types=TokenType.Identifier, msg="'function' keyword must be followed by identifier!") # consume parenthesis self.consume_next_token(types = TokenType.LBracket, content="(", msg="A function declaration must contain a list of arguments enclosed in parenthesis!") args = [] # TODO: we actually want to match against Token(kind=TokenType.RParen, content=")") while self.tokens.peek().content != ')': args.append(self.parse_function_def_arg()) self.consume_optional_eol() if not self.consume_optional(content=','): break self.consume_optional_eol() self.consume_next_token(types=TokenType.RBracket, content=")", msg="Expected ')' at the end of function argument list!") if self.tokens.peek().content == '->': raise UnsupportedSyntaxError(self.tokens.peek(), "Function return type annotations are not yet supported!") if self.tokens.peek().content == '=>': raise UnsupportedSyntaxError(self.tokens.peek(), "Short function body notation not yet supported!") self.consume_next_token(types=TokenType.LBracket, content="{") content = list(self.parse_basic_block()) self.consume_next_token(types=TokenType.RBracket, content="}", msg="Expected '}' at the end of a function body!") return FunctionNode(function_name, args, None, content) def parse_function_def_arg(self) -> FunctionArgument: """ Parse a single argument of a function. Currently this allows name: type In the future we want to also support name: type = value """ identifier = self.consume_next_token(types=TokenType.Identifier, msg="Function argument name expected!") self.consume_next_token(types=TokenType.Operator, content=":", msg="Function argument name must be followed by a colon ':' and a type definition!") type = self.parse_type() if self.tokens.peek().content == '=': raise UnsupportedSyntaxError(self.tokens.peek(), "default values for function arguments") return FunctionArgument(identifier, type) def parse_type(self) -> Type: """ Parse a type declaration, such as String, i64, or Vector """ main_type = self.consume_next_token(types=TokenType.Identifier, msg="Expected type name!") # if this type does not wrap any other types, we are done! if self.tokens.peek().content != '<': return Type(main_type, []) wrapped_types = [] start_token = self.consume_next_token(content="<") while self.tokens.peek().content != '>': wrapped_types.append(self.parse_type()) if not self.consume_optional(content=","): break self.consume_optional_eol() self.consume_next_token(content=">", msg="Error while parsing list of wrapped types, expected '>' at the end of the type list!") if len(wrapped_types) == 0: print_warning(self.tokens.last_item.span.union(start_token.span), "Empty set of type arguments!") return Type(main_type, wrapped_types) def parse_const_declaration(self): """ parse a const declaration, so basically const name: type = value """ self.consume_next_token(types=TokenType.Keyword, content="const") identifier = self.consume_next_token(types=TokenType.Identifier, msg="const keywords must be immediately followed by a variable name!") type = None if self.tokens.peek().content == ':': self.consume_next_token(content=':') type = self.parse_type() self.consume_next_token(content='=', msg="Expected '=' in const declaration!") value = self.parse_value() self.consume_expected_eol("Const declaration statement must be terminated by a newline!") return VariableDeclarationNode(identifier, ['const'], type, Value(None, value)) def parse_value(self) -> ASTNode: """ This function parses a "value", so basically any statement that evaluates to a value This can be a literal, a function call, an array/struct constructor, etc. """ # handle bracketet expression if self.tokens.peek().content == '(': self.consume_next_token(content='(') self.consume_optional_eol() value = self.parse_value() self.consume_optional_eol() self.consume_next_token(content=')', msg="Expected closing bracket") value = self._inner_parse_value() match self.tokens.peek(): case OperatorToken(content='..'): self.consume_next_token(types=TokenType.Operator, content="..") right_hand_type = self.parse_type() return SpreadOperatorNode(value, right_hand_type, None) #case OperatorToken(content): # raise UnsupportedSyntaxError(self.tokens.peek(), f"'{content}' is not implemented yet!") case Token(kind=TokenType.LBracket, content="("): self.consume_next_token(content="(") self.consume_optional_eol() args = list(self.parse_inner_function_call_args()) self.consume_next_token(content=')', msg="") return FunctionCallNode(value, args) def _inner_parse_value(self) -> ASTNode: match self.tokens.peek(): case IntegerLiteralToken(value, suffix, span): if suffix: type = Type(suffix, []) else: # assume widest signed integer type available type = Type('i64', []) self.consume_next_token() return IntegerImmediateNode(value, type, span) case Token(span, content, kind=TokenType.String): self.consume_next_token() return StringImmediateNode(content, Type("String", []), span) case Token(content="{", kind=TokenType.LBracket): return self.parse_structured_value() case IdentifierToken(): return VariableNameNode(self.consume_next_token(TokenType.Identifier)) case other: raise UnsupportedSyntaxError(other, "This type of value is not implemented yet!") def parse_inner_function_call_args(self) -> Iterable[ASTNode]: if self.tokens.peek().content == ')': return while True: self.consume_optional_eol() yield self.parse_value() self.consume_optional_eol() if self.tokens.peek().content == ',': self.consume_next_token(content=",") continue if self.tokens.peek().content == ')': break def parse_structured_value(self) -> ASTNode: """ parse either a list or struct initializer: list initializer: const data: Vector = {1,2,3,4,5} struct initializer: const data: MyStruct = { field1: 1 field2: "Hello World" arrayField: {"test", "123", "these are strings"} } """ raise UnsupportedSyntaxError(self.tokens.peek(), "Structured values such as lists, dictionaries and structs!") def parse_for_statement(self) -> ForLoopNode: self.consume_next_token(content='for', types=TokenType.Keyword) loop_variable_name = self.consume_next_token(types=TokenType.Identifier, msg="Name of the loop variable expected!") self.consume_next_token(types=TokenType.Keyword, content="in", msg="for in format required!") iterator = self.parse_value() self.consume_next_token(content='{') self.consume_optional_eol() body = list(self.parse_basic_block()) self.consume_optional_eol() self.consume_next_token(content='}') self.consume_optional_eol() return ForLoopNode(loop_variable_name, iterator, body)