You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
riscemu/riscemu/parser.py

128 lines
4.7 KiB
Python

"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import re
from typing import Dict, Tuple, Iterable, Callable, List
from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
from .colors import FMT_PARSE
from .helpers import Peekable
from .tokenizer import Token, TokenType, tokenize
from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction
from .types.exceptions import ParseException
def parse_instruction(token: Token, args: Tuple[str], context: ParseContext):
if context.section is None or context.section.type != MemorySectionType.Instructions:
raise ParseException("{} {} encountered in invalid context: {}".format(token, args, context))
ins = SimpleInstruction(token.value, args, context.context, context.section.current_address())
context.section.data.append(ins)
def parse_label(token: Token, args: Tuple[str], context: ParseContext):
name = token.value[:-1]
if re.match(r'^\d+$', name):
# relative label:
context.context.numbered_labels[name].append(context.section.current_address())
else:
if name in context.context.labels:
print(FMT_PARSE + 'Warn: Symbol {} defined twice!'.format(name))
context.add_label(name, context.section.current_address(), is_relative=True)
PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {
TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction,
TokenType.LABEL: parse_label,
TokenType.INSTRUCTION_NAME: parse_instruction
}
def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
"""
Convert a token stream into a parsed program
:param name: the programs name
:param tokens_iter: the programs content, tokenized
:return: a parsed program
"""
context = ParseContext(name)
for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
if token.type not in PARSERS:
raise ParseException("Unexpected token type: {}, {}".format(token, args))
PARSERS[token.type](token, args, context)
return context.finalize()
def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]:
"""
Convert an iterator over tokens into an iterator over tuples: (token, list(token))
The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before
a newline is encountered
:param tokens_iter: An iterator over tokens
:return: An iterator over a slightly more structured representation of the tokens
"""
tokens: Peekable[Token] = Peekable[Token](tokens_iter)
while not tokens.is_empty():
token = next(tokens)
if token.type in (TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME):
yield token, tuple(take_arguments(tokens))
def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
"""
Consumes (argument comma)* and yields argument.value until newline is reached
If an argument is not followed by either a newline or a comma, a parse exception is raised
The newline at the end is consumed
:param tokens: A Peekable iterator over some Tokens
"""
while True:
if tokens.peek().type == TokenType.ARGUMENT:
yield next(tokens).value
elif tokens.peek().type == TokenType.NEWLINE:
next(tokens)
break
elif tokens.peek().type == TokenType.COMMA:
next(tokens)
else:
break
# raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
class AssemblyFileLoader(ProgramLoader):
"""
This class loads assembly files written by hand. It understands some assembler directives and supports most
pseudo instructions. It does very little verification of source correctness.
It also supports numbered jump targets and properly supports local and global scope (.globl assembly directive)
The AssemblyFileLoader loads .asm, .S and .s files by default, and acts as a weak fallback to all other filetypes.
"""
def parse(self) -> Program:
with open(self.source_path, 'r') as f:
return parse_tokens(self.filename, tokenize(f))
@classmethod
def can_parse(cls, source_path: str) -> float:
"""
It also acts as a weak fallback if no other loaders want to take the file.
:param source_path: the path to the source file
:return:
"""
# gcc recognizes these line endings as assembly. So we will do too.
if source_path.split('.')[-1] in ('asm', 'S', 's'):
return 1
return 0.01
@classmethod
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
return argv, {}