parser is working with very basic functionality

2 years ago · 525ef8f467
commit 525ef8f467
14 changed files with 1119 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 venv
 __pycache__
--- a/0
+++ b/0
--- a/compiler/init.py
+++ b/compiler/init.py
--- a/compiler/ast_printer.py
+++ b/compiler/ast_printer.py
--- a/compiler/defs.py
+++ b/compiler/defs.py
@ -0,0 +1,213 @@
 from dataclasses import dataclass, fields
 from mimetypes import suffix_map
 from ntpath import join
 from typing import Dict, List, Iterable, Generator, Any, Literal
 from enum import Enum, auto
@dataclass
 class LexingContext:
    sources: Dict[str,str]
    entrypoint: str
    structs: Dict[int, Any]         # TODO: struct def type
    functions: Dict[str, Any]       # TODO: function types 
    def get_nth_line_bounds(self, source_name: str, n: int):
        if source_name not in self.sources:
            raise KeyError("Unknown source file \"{}\"!".format(source_name))
        start = 0
        source = self.sources[source_name]
        for i in range(n):
            next_start = source.find('\n', start)
            if next_start == -1:
                return None
            start = next_start + 1
        return start, source.find('\n', start)
    def get_lines_containing(self, span: 'Span'):
        if span.source_name not in self.sources:
            raise KeyError("Unknown source file \"{}\"!".format(span.source_name))
        start = 0
        line_no = 0
        source = self.sources[span.source_name]
        while True:
            next_start = source.find('\n', start)
            line_no += 1
            # handle eof
            if next_start == -1:
                return None
            # as long as the next newline comes before the spans start we are good
            if next_start < span.start:
                start = next_start + 1
                continue
            # if the whole span is on one line, we are good as well
            if next_start >= span.end:
                return [ source[start:next_start] ], start, line_no
            while next_start < span.end:
                next_start = source.find('\n', next_start+1)
            return source[start:next_start].split('\n'), start, line_no
@dataclass(frozen=True)
 class Span:
    start: int
    """
    Start of tokens location in source file, global byte offset in file
    """
    end: int
    """
    End of tokens location in source file, global byte offset in file
    """
    source_name: str
    context: LexingContext
    def union(self, *spans: 'Span'):
        for span in spans:
            assert span.source_name == self.source_name
            assert span.context == self.context
        return Span(
            start=min(self.start, *(span.start for span in spans)),
            end=max(self.end, *(span.end for span in spans)),
            source_name=self.source_name,
            context=self.context
        )
    def transform(self, start:int=0, end:int=0):
        return Span(self.start + start, self.end - end, self.source_name, self.context)
    def __repr__(self):
        return "{}(start={},end={},source_name={})".format(
            self.__class__.__name__,
            self.start, self.end, self.source_name
        )
 class TokenType(Enum):
    Keyword = auto()
    Integer = auto()
    Float = auto()
    Identifier = auto()
    String = auto()
    LBracket = auto()
    RBracket = auto()
    Operator = auto()
    LineComment = auto()
    MultiComment = auto()
    EOL = auto()    # End of Line
    EOI = auto()    # End of Input
@dataclass(frozen=True)
 class Token:
    span: Span
    content: str
    kind: TokenType
    def __new__(cls, span: Span, content: str, kind: TokenType):
        if kind in token_type_to_subclass_map and cls == Token:
            return token_type_to_subclass_map[kind].__new__(
                token_type_to_subclass_map[kind], span, content, kind
            )
        return super().__new__(cls)
    def __repr__(self):
        fields_to_print = [field for field in fields(self) if field.name not in ('span', 'context', 'kind')]
        if self.__class__ == Token:
            return "{}[{}]({})".format(
                self.__class__.__name__,
                self.kind.name,
                ", ".join("{}={}".format(field.name, repr(getattr(self, field.name))) for field in fields_to_print)
            )
        else:
            return "{}({})".format(
                self.__class__.__name__,
                ", ".join("{}={}".format(field.name, repr(getattr(self, field.name))) for field in fields_to_print)
            )
@dataclass(frozen=True, init=False, repr=False)
 class IntegerLiteralToken(Token):
    value: int
    format: Literal['hex', 'int', 'dec', 'oct']
    suffix: str | None
    def __init__(self, span: Span, content: str, kind: TokenType):
        super().__init__(span, content, kind)
        assert kind == TokenType.Integer
        suffix = None
        for suffix_ in integer_type_suffixes:
            if content.endswith(suffix_):
                suffix = suffix_
                content.removesuffix(suffix_)
                break
        format = 'dec'
        if content.startswith('0x') or content.startswith('0X'):
            value = int(content, 16)
            format = 'hex'
        elif content.startswith('0b'):
            value = int(content, 2)
            format = 'bin'
        else:
            value = int(content, 10)
        object.__setattr__(self, "value", value)
        object.__setattr__(self, "suffix", suffix)
        object.__setattr__(self, "format", format)
@dataclass(frozen=True, repr=False)
 class KeywordToken(Token):
    pass
@dataclass(frozen=True, repr=False)
 class OperatorToken(Token):
    pass
@dataclass(frozen=True, repr=False)
 class IdentifierToken(Token):
    pass
 token_type_to_subclass_map = {
    TokenType.Integer: IntegerLiteralToken,
    TokenType.Keyword: KeywordToken,
    TokenType.Operator: OperatorToken,
    TokenType.Identifier: IdentifierToken
 }
 keywords = {
    'const', 'let', 'for', 'if', 'function', 
    'true', 'false', 'in', 'not', 'or', 'and', 
    'struct', 'private', 'public', 'return', 
    'impure', 'while', 'use', 'do', 'continue',
    'break'
 }
 digits = {
    'bin': '01_',
    'hex': '0123456789abcdefABCDEF_',
    'dec': '0123456789_',
    'oct': '01234567_' # TODO: implement octal literals?
 }
 operators = {
    '+', '-', '*', '/', '!', '.', ',', '<', '>', ':', '<<', '>>', '&&', '||',
    '??', '%','==', '!=', '<=', '>=', '..', '=>', '++', '--', 
    '=', '*=', '+=', '/=', '-='
 }
 integer_type_suffixes = {
    'i8', 'i16', 'i32', 'i64', 
    'u8', 'u16', 'u32', 'u64',
 }
 reserved_special_chars = {
    '#', '~', '`', '"', '\'', '@', '|', ';'
 }
 parens = '[]{}()<>'
 identifier_terminating_chars = set((*operators, *parens, ' ', '\n', '\t', '\r', *reserved_special_chars))
--- a/compiler/errors.py
+++ b/compiler/errors.py
@ -0,0 +1,90 @@
 from .defs import Span, LexingContext, Token, TokenType
 from math import exp, log10, ceil
 from typing import Iterable
 def create_span_context_str(span: Span, message: str, color: str = '\033[31m'):
    lines, offset_into_file, line_no = span.context.get_lines_containing(span)
    relative_offset = span.start - offset_into_file
    annotation_len = span.end - span.start
    digit_len = ceil(log10(line_no + len(lines)))
    if digit_len == 0:
        digit_len = 1
    output_str = ">>> In file {}:{}\n".format(span.source_name, line_no)
    for i, source_line in enumerate(lines):
        source_line = source_line[:relative_offset] + color + source_line[relative_offset:relative_offset+annotation_len] + '\033[0m' + source_line[relative_offset+annotation_len:]
        output_str += '{:>{}d}: {}\n'.format(line_no + i, digit_len, source_line)
        if relative_offset > len(source_line):
            continue
        # TODO: handle multi-line underlines
        output_str += "{}{}{}{}\n".format(
            color,
            ' ' * (relative_offset + digit_len + 2),
            '^' * min(annotation_len, len(source_line) - relative_offset),
            '\033[0m'
        )
        if annotation_len > len(source_line) - relative_offset:
            relative_offset = 0
        annotation_len -= len(source_line) - relative_offset
    if message:
        output_str += color
        output_str += ' ' * (relative_offset + digit_len + 2) + '|\n'
        for message_line in message.split("\n"):
            output_str += ' ' * (relative_offset + digit_len + 2) + message_line + '\n'
    return output_str + '\033[0m'
 def print_warning(span: Span, message: str, color="\033[33m"):
    print(create_span_context_str(span, "Warning: " + message, color))
 class CompilerError(Exception):
    span: Span
    message: str
    def __init__(self, msg: str, span: Span=None) -> None:
        super().__init__((msg, span))
        self.span = span
        self.message = msg
    def print_context_message(self):
        if not self.span:
            print("\n".join(">>> {}".format(line) for line in self.message.split('\n')))
        else:
            print(create_span_context_str(self.span, self.message))
 class EndOfInputError(CompilerError):
    def __init__(self,span: Span,  search_str:str = None) -> None:
        if search_str:
            super().__init__(f"Unexpected end-of-input in {span.source_name} while scanning for {search_str}!", span)
        else:
            super().__init__(f"Unexpected end-of-input in {span.source_name}!", span)
 class ParseError(CompilerError):
    def __init__(self, msg: str, span: Span = None) -> None:
        super().__init__(msg, span)
 class InvalidTokenError(CompilerError):
    def __init__(self, token: Token, expected_type: Iterable[str | TokenType] = None, message: str = None) -> None:
        expected = ", expected {}".format(", ".join(f"{x}" for x in expected_type)) if expected_type else ""
        super().__init__("Unexpected token {}{} {}".format(
            token, expected, '\n' + message if message else ""
        ), token.span if token is not None else None)
 class UnsupportedSyntaxError(CompilerError):
    def __init__(self, token: Token, feature: str) -> None:
        super().__init__("Unsupported syntax: {}".format(feature), token.span)
--- a/compiler/helpers.py
+++ b/compiler/helpers.py
@ -0,0 +1,74 @@
 from typing import TypeVar, Generic, Iterable, Iterator, List
 from .defs import Token, TokenType
 T = TypeVar("T")
 class PeekableIterator(Iterator[T]):
    _peeked: List[T]
    last_item: T | None
    def __init__(self, iterable: Iterable[T]) -> None:
        self.iter = iterable
        self._peeked = list()
        self.last_item = None
    def peek(self, offset: int = 0):
        while len(self._peeked) <= offset:
            try:
                self._peeked.append(next(self.iter))
            except StopIteration:
                return None
        return self._peeked[offset]
    def __next__(self) -> T:
        if len(self._peeked) > 0:
            item = self._peeked.pop(0)
        else:
            item = next(self.iter)
        self.last_item = item
        return item
    def __iter__(self) -> Iterator[T]:
        return self
    def next(self) -> T:
        try:
            return next(self)
        except StopIteration:
            return None
    def has_next(self):
        return self.peek() is not None
 class ParserIterator(PeekableIterator[Token]):
    def __init__(self, iterable: Iterable[Token]) -> None:
        super().__init__(t for t in iterable if t.kind not in (TokenType.LineComment, TokenType.MultiComment))
        self.ignore_newline = False
    def peek(self, offset: int = 0):
        while len(self._peeked) <= offset:
            try:
                self._peeked.append(next(self.iter))
            except StopIteration:
                return None
        token = self._peeked[offset]
        if self.ignore_newline and token.kind == TokenType.EOL:
            return self.peek(offset=offset+1)
        return token
    def __next__(self) -> T:
        if len(self._peeked) > 0:
            item = self._peeked.pop(0)
        else:
            item = next(self.iter)
        self.last_item = item
        if self.ignore_newline and item.kind == TokenType.EOL:
            return next(self)
        return item
--- a/compiler/lexer.py
+++ b/compiler/lexer.py
@ -0,0 +1,201 @@
 from ast import operator
 from dataclasses import dataclass
 from sre_parse import parse_template
 from typing import Dict, List, Iterable, Generator, Tuple
 from enum import Enum, auto
 from unicodedata import digit
 from .errors import EndOfInputError, ParseError
 from .defs import Span, LexingContext, Token, TokenType, digits, keywords, operators, integer_type_suffixes, parens, identifier_terminating_chars
 class Lexer:
    separators = ':-+*/#!"\'=?%&<>[]{}()\n \t'
    parens = '[]{}()<>'
    context: LexingContext
    content: str
    pos: int
    line: int
    fname: str
    size: int
    def __init__(self, fname: str, context: LexingContext):
        self.content = context.sources[fname]
        self.fname = fname
        self.pos = self.line = 0
        self.word = ""
        self.size = len(self.content)
        self.context = context
    def peek(self, offset: int = 0):
        if self.pos + offset >= self.size:
            return None
        return self.content[self.pos + offset]
    def startswith(self, *patterns: str, offset: int = 0):
        # match longest first
        for pattern in sorted(patterns, key=len, reverse=True):
            if self.content.startswith(pattern, self.pos + offset):
                return pattern
        return False
    def read_until(self, pattern: str, inclusive=True) -> Tuple[str, Span]:
        start = self.pos
        pos = self.pos
        while not self.content[pos:].startswith(pattern) and pos < self.size:
            pos += 1
        if pos == self.size:
            raise EndOfInputError(Span(start, pos, self.fname, self.context), pattern)
        if inclusive:
            pos += len(pattern)
        self.pos = pos
        return self.content[start:pos], Span(start, pos, self.fname, self.context)
    def do_parse(self) -> Iterable[Token]:
        while True:
            c = self.peek()
            # reached end of input
            if c == None:
                yield Token(Span(self.pos, self.pos, self.fname, self.context), "", TokenType.EOI)
                break
            if c in '\n\r':
                start = self.pos
                if self.startswith('\r\n'):
                    self.pos += 1
                self.pos += 1
                yield self.Token(start, TokenType.EOL)
                continue
            # check for integer literals
            if self.startswith('0x', '0X', '0b', *'0123456789'):
                yield self.parse_integer()
                continue
            # check for parenthesis
            if c in parens:
                # left parens at position 0, 2, 4, 6
                left_paren = parens.index(c) % 2 == 0
                self.pos += 1
                yield self.Token(self.pos -1, TokenType.LBracket if left_paren else TokenType.RBracket)
                continue
            if self.startswith('//'):
                start = self.pos
                self.read_until('\n', inclusive=False) # read until newline, but don't consume newline
                yield self.Token(start, TokenType.LineComment)
                continue
            if self.startswith('/*'):
                start = self.pos
                self.read_until('*/')
                yield self.Token(start, TokenType.MultiComment)
                continue
            starts_with_keyword = self.startswith(*keywords)
            if starts_with_keyword:
                start = self.pos
                self.pos += len(starts_with_keyword)
                yield self.Token(start, TokenType.Keyword)
                self.consume_expected_whitespace()
                continue
            starts_with_operator = self.startswith(*operators)
            if starts_with_operator:
                start = self.pos
                self.pos += len(starts_with_operator)
                yield self.Token(start, TokenType.Operator)
                continue
            if self.peek() in '"\'':
                yield self.parse_string()
                continue
            if self.peek() in ' \t':
                self.pos += 1
                continue
            # must be an identifier then
            start = self.pos
            while self.peek() not in identifier_terminating_chars:
                self.pos += 1
            if start == self.pos:
                raise ParseError("Expected identifier!", Span(start, start+1, self.fname, self.context))
            yield self.Token(start, TokenType.Identifier)
            continue
    def consume_expected_whitespace(self):
        if self.peek() in '\r\n':
            return
        if self.peek() not in '\t ':
            raise ParseError("Expected whitespace here", Span(self.pos, self.pos+1, self.fname, self.context))
        while self.peek() in '\t ':
            self.pos += 1
    def parse_integer(self):
        start = self.pos
        if self.startswith('-'):
            self.pos += 1
        parse_type = 'dec'
        if self.startswith('0x', '0X'):
            parse_type = 'hex'
            self.pos += 2
        elif self.startswith('0b'):
            parse_type = 'bin'
            self.pos += 2
        while self.peek() in digits[parse_type]:
            self.pos += 1
        suffix = self.startswith(*integer_type_suffixes)
        if suffix:
            self.pos += len(suffix)
        return self.Token(start, TokenType.Integer)
    def parse_string(self):
        start = self.pos
        terminator = self.peek()
        escaped = False
        self.pos += 1
        string = ""
        while not escaped and self.peek() != terminator:
            char = self.peek()
            if escaped:
                match char:
                    case 'r':
                        string += '\r'
                    case 'b':
                        string += '\b'
                    case 'n':
                        string += '\n'
                    case 't':
                        string += '\t'
                    case 'e': # support terminal escape codes
                        string += '\033'
                    case other:
                        string += '\\' + other
                escaped = False
            elif self.peek() == '\\':
                escaped = True
            else:
                string += char
            self.pos += 1
        # consume trailing terminator
        self.pos += 1
        return self.Token(start, TokenType.String, content=string)
    def Token(self, start: int, type: TokenType, end=None, content=None) -> Token:
        if end is None:
           end = self.pos
        if content is None:
            content = self.content[start:end]
        return Token(Span(start, end, self.fname, self.context), content, type) 
--- a/compiler/parser.py
+++ b/compiler/parser.py
@ -0,0 +1,466 @@
 from dataclasses import dataclass
 import imp
 from webbrowser import Opera
 from .defs import Token, IntegerLiteralToken, TokenType, OperatorToken, KeywordToken, IdentifierToken, Span
 from .lexer import Lexer
 from .helpers import ParserIterator
 from .errors import CompilerError, EndOfInputError, InvalidTokenError, UnsupportedSyntaxError, print_warning
 from typing import Tuple, Optional, List, Dict, Set, Iterable
@dataclass(frozen=True)
 class Type:
    name: IdentifierToken | str
    wraps: Tuple['Type', ...]
@dataclass(frozen=True)
 class Value:
    type: Optional[Type]
    value: 'ASTNode'
@dataclass(frozen=True)
 class FunctionArgument:
    name: IdentifierToken
    type: Type
    #default_value: Value | None
@dataclass(frozen=True)
 class ASTNode:
    pass
@dataclass(frozen=True)
 class FunctionNode(ASTNode):
    name: IdentifierToken
    args: Tuple[FunctionArgument, ...]
    return_type: Type
    contents: Tuple[ASTNode, ...]
@dataclass(frozen=True)
 class FunctionCallNode(ASTNode):
    function: Value
    arguments: List[Value]
@dataclass(frozen=True)
 class VariableDeclarationNode(ASTNode):
    name: IdentifierToken
    modifiers: List[str]
    type: Type
    value: Value
@dataclass(frozen=True)
 class ForLoopNode(ASTNode):
    variable_name: IdentifierToken | str
    iterator: Value
    body: List[ASTNode]
@dataclass(frozen=True)
 class SpreadOperatorNode(ASTNode):
    left_side: Value
    right_side: Value
    type: Type | None
@dataclass(frozen=True)
 class IntegerImmediateNode(ASTNode):
    value: int
    type: Type
    span: Span
@dataclass(frozen=True)
 class StringImmediateNode(ASTNode):
    value: str
    type: Type
    span: Span
@dataclass(frozen=True)
 class VariableNameNode(ASTNode):
    name: IdentifierToken
@dataclass(frozen=True)
 class BracketetExpressionNode(ASTNode):
    content: ASTNode
@dataclass(frozen=True)
 class UseStatement(ASTNode):
    path: List[IdentifierToken]
 class Parser:
    """
    This class takes a lexed input and produces a syntax tree. 
    This only validates syntax, but does no type checking etc...
    """
    types: Dict[str, Type]
    lexer: Lexer
    tokens: ParserIterator[Token]
    def __init__(self, lexer: Lexer):
        self.variables = dict()
        self.lexer = lexer
        # strip comments from tokens
        self.tokens = ParserIterator(lexer.do_parse())
    def parse(self):
        body = []
        while True:
            thing = self.parse_file_level_block()
            if thing is None:
                return body
            body.append(thing)
            print(thing)
    def consume_next_token(self, types:Set[TokenType]|TokenType = None, content: str = None, msg: str = None):
        if not isinstance(types, set) and types is not None:
            types = {types}
        peeked = self.tokens.peek()
        if peeked is None:
            raise EndOfInputError(self.tokens.last_item.span, content)
        if types is not None and peeked.kind not in types:
            raise InvalidTokenError(peeked, (*types, content), msg)
        if content is not None and peeked.content != content:
            raise InvalidTokenError(peeked, {content}, msg)
        return self.tokens.next()
    def consume_optional_eol(self):
        """
        This function tries to consume EOL tokens, if they are available
        """
        while self.tokens.peek().kind == TokenType.EOL:
            self.tokens.next()
    def consume_expected_eol(self, msg):
        """
        This function consumes at least one EOL token, or fails
        """
        if self.tokens.peek().kind != TokenType.EOL:
            raise InvalidTokenError(self.tokens.peek(), expected_type=["\\n"], message=msg)
        while self.tokens.peek().kind == TokenType.EOL:
            self.tokens.next()
    def consume_optional(self, types:Set[TokenType]|TokenType = None, content: str = None, msg: str = None):
        try:
            return self.consume_next_token(types, content, msg)
        except InvalidTokenError:
            return False
    def parse_file_level_block(self) -> ASTNode | None:
        """
        File-level blocks are statements written at the file level.
        """
        # this part ignores newlines!
        prev_ignore_lvl = self.tokens.ignore_newline
        self.tokens.ignore_newline = True
        try:
            match self.tokens.peek():
                case KeywordToken(content="function"):
                    return self.parse_function_definition()
                case KeywordToken(content="struct"):
                    return self.parse_struct()
                case KeywordToken(content="const"):
                    return self.parse_const_declaration()
                case KeywordToken(content="use"):
                    return self.parse_import_statement()
                case Token(kind=TokenType.EOI):
                    return None
                case None:
                    raise Exception("Unexpected None token!")
                case unknown_token:
                    raise InvalidTokenError(unknown_token, ("function", "struct"), "Only function and struct declarations are allowed at file-level!")
        finally:
            self.tokens.ignore_newline = prev_ignore_lvl
    def parse_import_statement(self):
        """
        parse an import-equivalent statement:
        use std.String
        """
        self.consume_next_token(types=TokenType.Keyword, content="use")
        path = []
        if self.tokens.peek().kind == TokenType.String:
            raise UnsupportedSyntaxError(self.tokens.peek(), "file paths in use statements!")
        prev = self.tokens.ignore_newline
        self.tokens.ignore_newline = False
        while self.tokens.peek().kind != TokenType.EOL:
            path.append(self.consume_next_token(TokenType.Identifier))
            if self.tokens.peek().content == '.':
                self.consume_next_token(types=TokenType.Operator, content='.')
        self.consume_expected_eol("'use' statement must be terminated by EOL!")
        self.tokens.ignore_newline = prev
        return UseStatement(path)
    def parse_basic_block(self) -> Iterable[ASTNode]:
        """
        A "Basic Block" is a block inside a function, for loop, etc.
        """
        # when parsing blocks, newlines are important!
        prev_ignore_lvl = self.tokens.ignore_newline
        self.tokens.ignore_newline = False
        if prev_ignore_lvl:
            # consume all remaining EOLs
            self.consume_optional_eol()
        try:
            while True:
                match self.tokens.peek():
                    case KeywordToken(content="function"):
                        yield self.parse_function_definition()
                    case KeywordToken(content="const"):
                        yield self.parse_const_declaration()
                    case KeywordToken(content="let"):
                        raise UnsupportedSyntaxError(self.tokens.peek(), "'let' not supported yet")
                    case KeywordToken(content="for"):
                        yield self.parse_for_statement()
                    case KeywordToken(content="return"):
                        raise UnsupportedSyntaxError(self.tokens.peek(), "'return' not supported yet")
                    case KeywordToken(content="if"):
                        raise UnsupportedSyntaxError(self.tokens.peek(), "'if' not supported yet")
                    case KeywordToken(content="struct"):
                        # TODO: support
                        raise UnsupportedSyntaxError(self.tokens.peek(), "structs not supported yet")
                    case Token(kind=TokenType.RBracket, content="}"):
                        break
                    case other:
                        yield self.parse_value()
                        self.consume_expected_eol(msg="Only one statement per line permitted!")
        finally:
            self.tokens.ignore_newline = prev_ignore_lvl
    def parse_function_definition(self):
        """
        Parses a function definition including the body
        """
        self.tokens.next()
        function_name = self.consume_next_token(types=TokenType.Identifier, msg="'function' keyword must be followed by identifier!")
        # consume parenthesis
        self.consume_next_token(types = TokenType.LBracket, content="(", msg="A function declaration must contain a list of arguments enclosed in parenthesis!")
        args = []
        # TODO: we actually want to match against Token(kind=TokenType.RParen, content=")")
        while self.tokens.peek().content != ')':
            args.append(self.parse_function_def_arg())
            self.consume_optional_eol()
            if not self.consume_optional(content=','):
                break
            self.consume_optional_eol()
        self.consume_next_token(types=TokenType.RBracket, content=")", msg="Expected ')' at the end of function argument list!")
        if self.tokens.peek().content == '->':
            raise UnsupportedSyntaxError(self.tokens.peek(), "Function return type annotations are not yet supported!")
        if self.tokens.peek().content == '=>':
            raise UnsupportedSyntaxError(self.tokens.peek(), "Short function body notation not yet supported!")
        self.consume_next_token(types=TokenType.LBracket, content="{")
        content = list(self.parse_basic_block())
        self.consume_next_token(types=TokenType.RBracket, content="}", msg="Expected '}' at the end of a function body!")
        return FunctionNode(function_name, args, None, content)
    def parse_function_def_arg(self) -> FunctionArgument:
        """
        Parse a single argument of a function.
        Currently this allows name: type
        In the future we want to also support name: type = value
        """
        identifier = self.consume_next_token(types=TokenType.Identifier, msg="Function argument name expected!")
        self.consume_next_token(types=TokenType.Operator, content=":", msg="Function argument name must be followed by a colon ':' and a type definition!")
        type = self.parse_type()
        if self.tokens.peek().content == '=':
            raise UnsupportedSyntaxError(self.tokens.peek(), "default values for function arguments")
        return FunctionArgument(identifier, type)
    def parse_type(self) -> Type:
        """
        Parse a type declaration, such as String, i64, or Vector<i64>
        """
        main_type = self.consume_next_token(types=TokenType.Identifier, msg="Expected type name!")
        # if this type does not wrap any other types, we are done!
        if self.tokens.peek().content != '<':
            return Type(main_type, [])
        wrapped_types = []
        start_token = self.consume_next_token(content="<")
        while self.tokens.peek().content != '>':
            wrapped_types.append(self.parse_type())
            if not self.consume_optional(content=","):
                break
            self.consume_optional_eol()
        self.consume_next_token(content=">", msg="Error while parsing list of wrapped types, expected '>' at the end of the type list!")
        if len(wrapped_types) == 0:
            print_warning(self.tokens.last_item.span.union(start_token.span), "Empty set of type arguments!")
        return Type(main_type, wrapped_types)
    def parse_const_declaration(self):
        """
        parse a const declaration, so basically
        const name: type = value
        """
        self.consume_next_token(types=TokenType.Keyword, content="const")
        identifier = self.consume_next_token(types=TokenType.Identifier, msg="const keywords must be immediately followed by a variable name!")
        type = None
        if self.tokens.peek().content == ':':
            self.consume_next_token(content=':')
            type = self.parse_type()
        self.consume_next_token(content='=', msg="Expected '=' in const declaration!")
        value = self.parse_value()
        self.consume_expected_eol("Const declaration statement must be terminated by a newline!")
        return VariableDeclarationNode(identifier, ['const'], type, Value(None, value))
    def parse_value(self) -> ASTNode:
        """
        This function parses a "value", so basically any statement that evaluates to a value
        This can be a literal, a function call, an array/struct constructor, etc.
        """
        # handle bracketet expression
        if self.tokens.peek().content == '(':
            self.consume_next_token(content='(')
            self.consume_optional_eol()
            value = self.parse_value()
            self.consume_optional_eol()
            self.consume_next_token(content=')', msg="Expected closing bracket")
        value = self._inner_parse_value()
        match self.tokens.peek():
            case OperatorToken(content='..'):
                self.consume_next_token(types=TokenType.Operator, content="..")
                right_hand_type = self.parse_type()
                return SpreadOperatorNode(value, right_hand_type, None)
            #case OperatorToken(content):
            #    raise UnsupportedSyntaxError(self.tokens.peek(), f"'{content}' is not implemented yet!")
            case Token(kind=TokenType.LBracket, content="("):
                self.consume_next_token(content="(")
                self.consume_optional_eol()
                args = list(self.parse_inner_function_call_args())
                self.consume_next_token(content=')', msg="")
                return FunctionCallNode(value, args)
    def _inner_parse_value(self) -> ASTNode:
        match self.tokens.peek():
            case IntegerLiteralToken(value, suffix, span):
                if suffix:
                    type = Type(suffix, [])
                else:
                    # assume widest signed integer type available
                    type = Type('i64', [])
                self.consume_next_token()
                return IntegerImmediateNode(value, type, span)
            case Token(span, content, kind=TokenType.String):
                self.consume_next_token()
                return StringImmediateNode(content, Type("String", []), span)
            case Token(content="{", kind=TokenType.LBracket):
                return self.parse_structured_value()
            case IdentifierToken():
                return VariableNameNode(self.consume_next_token(TokenType.Identifier))
            case other:
                raise UnsupportedSyntaxError(other, "This type of value is not implemented yet!")
    def parse_inner_function_call_args(self) -> Iterable[ASTNode]:
        if self.tokens.peek().content == ')':
            return 
        while True:
            self.consume_optional_eol()
            yield self.parse_value()
            self.consume_optional_eol()
            if self.tokens.peek().content == ',':
                self.consume_next_token(content=",")
                continue
            if self.tokens.peek().content == ')':
                break    
    def parse_structured_value(self) -> ASTNode:
        """
        parse either a list or struct initializer:
        list initializer: 
        const data: Vector<i64> = {1,2,3,4,5}
        struct initializer:
        const data: MyStruct = {
            field1: 1
            field2: "Hello World"
            arrayField: {"test", "123", "these are strings"}
        }
        """
        raise UnsupportedSyntaxError(self.tokens.peek(), "Structured values such as lists, dictionaries and structs!")
    def parse_for_statement(self) -> ForLoopNode:
        self.consume_next_token(content='for', types=TokenType.Keyword)
        loop_variable_name = self.consume_next_token(types=TokenType.Identifier, msg="Name of the loop variable expected!")
        self.consume_next_token(types=TokenType.Keyword, content="in", msg="for <name> in <value> format required!")
        iterator = self.parse_value()
        self.consume_next_token(content='{')
        self.consume_optional_eol()
        body = list(self.parse_basic_block())
        self.consume_optional_eol()
        self.consume_next_token(content='}')
        self.consume_optional_eol()
        return ForLoopNode(loop_variable_name, iterator, body)
--- a/example.pmp
+++ b/example.pmp
@ -0,0 +1,21 @@
 /*struct something {
    i64 i
    Array<i64> data
    private i64 total
 }*/
 use std.string
 function main(args: Array<String>) {
    const cars = 100
    for x in 0x01..cars {
        print("{} cars are driving around the block", cars,)
    }
 }
--- a/runtime_lib/ProgramQueue.cpp
+++ b/runtime_lib/ProgramQueue.cpp
--- a/runtime_lib/ProgramQueue.h
+++ b/runtime_lib/ProgramQueue.h
@ -0,0 +1,2 @@
 #pragma once
--- a/test.py
+++ b/test.py
@ -0,0 +1,25 @@
 from compiler.errors import *
 from compiler.lexer import Lexer, LexingContext
 from compiler.parser import Parser
 import os
 fname = os.path.abspath('./example.pmp')
 c = LexingContext(dict(), fname, dict(), dict())
 try:
    with open(fname, 'r') as f:
        c.sources[fname] = f.read()
    lex = Lexer(fname, c)
    #for token in a.do_parse():
    #    print(token)
    a = Parser(lex)
    elems = a.parse()
 except CompilerError as err:
    err.print_context_message()
--- a/zipfs.pmp
+++ b/zipfs.pmp
@ -0,0 +1,25 @@
 use std.random randint
 const ALPHABET = 26
 function main() {
    // generate a lot of words
    map const x in 0..10000 into words {
        let word = 0
        let len = 0
        let char = randint(ALPHABET)
        while char != 0 {
            word *= ALPHABET
            word += char
            len++
            char = randint(ALPHABET)
        }
        yield word
    }
 }