""" RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ import re from typing import Dict, Tuple, Iterable, Callable, List from .assembler import MemorySectionType, ParseContext, AssemblerDirectives from .colors import FMT_PARSE from .helpers import Peekable from .tokenizer import Token, TokenType, tokenize from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction from .types.exceptions import ParseException def parse_instruction(token: Token, args: Tuple[str], context: ParseContext): if context.section is None: context.new_section(".text", MemorySectionType.Instructions) if context.section.type != MemorySectionType.Instructions: raise ParseException( "{} {} encountered in invalid context: {}".format(token, args, context) ) ins = SimpleInstruction( token.value, args, context.context, context.current_address() ) context.section.data.append(ins) def parse_label(token: Token, args: Tuple[str], context: ParseContext): name = token.value[:-1] if re.match(r"^\d+$", name): # relative label: context.context.numbered_labels[name].append(context.current_address()) else: if name in context.context.labels: print(FMT_PARSE + "Warn: Symbol {} defined twice!".format(name)) context.add_label(name, context.current_address(), is_relative=True) PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = { TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction, TokenType.LABEL: parse_label, TokenType.INSTRUCTION_NAME: parse_instruction, } def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program: """ Convert a token stream into a parsed program :param name: the programs name :param tokens_iter: the programs content, tokenized :return: a parsed program """ context = ParseContext(name) for token, args in composite_tokenizer(Peekable[Token](tokens_iter)): if token.type not in PARSERS: raise ParseException("Unexpected token type: {}, {}".format(token, args)) PARSERS[token.type](token, args, context) return context.finalize() def composite_tokenizer( tokens_iter: Iterable[Token], ) -> Iterable[Tuple[Token, Tuple[str]]]: """ Convert an iterator over tokens into an iterator over tuples: (token, list(token)) The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before a newline is encountered :param tokens_iter: An iterator over tokens :return: An iterator over a slightly more structured representation of the tokens """ tokens: Peekable[Token] = Peekable[Token](tokens_iter) while not tokens.is_empty(): token = next(tokens) if token.type in ( TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME, ): yield token, tuple(take_arguments(tokens)) def take_arguments(tokens: Peekable[Token]) -> Iterable[str]: """ Consumes (argument comma)* and yields argument.value until newline is reached If an argument is not followed by either a newline or a comma, a parse exception is raised The newline at the end is consumed :param tokens: A Peekable iterator over some Tokens """ while True: if tokens.peek().type == TokenType.ARGUMENT: yield next(tokens).value elif tokens.peek().type == TokenType.NEWLINE: next(tokens) break elif tokens.peek().type == TokenType.COMMA: next(tokens) else: break # raise ParseException("Expected newline, instead got {}".format(tokens.peek())) class AssemblyFileLoader(ProgramLoader): """ This class loads assembly files written by hand. It understands some assembler directives and supports most pseudo instructions. It does very little verification of source correctness. It also supports numbered jump targets and properly supports local and global scope (.globl assembly directive) The AssemblyFileLoader loads .asm, .S and .s files by default, and acts as a weak fallback to all other filetypes. """ def parse(self) -> Program: with open(self.source_path, "r") as f: return parse_tokens(self.filename, tokenize(f)) def parse_io(self, io): return parse_tokens(self.filename, tokenize(io)) @classmethod def can_parse(cls, source_path: str) -> float: """ It also acts as a weak fallback if no other loaders want to take the file. :param source_path: the path to the source file :return: """ # gcc recognizes these line endings as assembly. So we will do too. if source_path.split(".")[-1] in ("asm", "S", "s"): return 1 return 0.01 @classmethod def get_options(cls, argv: List[str]) -> [List[str], T_ParserOpts]: return argv, {}