You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
riscemu/riscemu/parser.py

144 lines
4.9 KiB
Python

"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import re
from typing import Dict, Tuple, Iterable, Callable, List
from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
from .colors import FMT_PARSE
from .helpers import Peekable
from .tokenizer import Token, TokenType, tokenize
from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction
from .types.exceptions import ParseException
def parse_instruction(token: Token, args: Tuple[str], context: ParseContext):
if context.section is None:
2 years ago
context.new_section(".text", MemorySectionType.Instructions)
if context.section.type != MemorySectionType.Instructions:
2 years ago
raise ParseException(
"{} {} encountered in invalid context: {}".format(token, args, context)
)
ins = SimpleInstruction(
token.value, args, context.context, context.current_address()
)
context.section.data.append(ins)
def parse_label(token: Token, args: Tuple[str], context: ParseContext):
name = token.value[:-1]
2 years ago
if re.match(r"^\d+$", name):
# relative label:
context.context.numbered_labels[name].append(context.current_address())
else:
if name in context.context.labels:
2 years ago
print(FMT_PARSE + "Warn: Symbol {} defined twice!".format(name))
context.add_label(name, context.current_address(), is_relative=True)
PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {
TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction,
TokenType.LABEL: parse_label,
2 years ago
TokenType.INSTRUCTION_NAME: parse_instruction,
}
def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
"""
Convert a token stream into a parsed program
:param name: the programs name
:param tokens_iter: the programs content, tokenized
:return: a parsed program
"""
context = ParseContext(name)
for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
if token.type not in PARSERS:
raise ParseException("Unexpected token type: {}, {}".format(token, args))
PARSERS[token.type](token, args, context)
return context.finalize()
2 years ago
def composite_tokenizer(
tokens_iter: Iterable[Token],
) -> Iterable[Tuple[Token, Tuple[str]]]:
"""
Convert an iterator over tokens into an iterator over tuples: (token, list(token))
The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before
a newline is encountered
:param tokens_iter: An iterator over tokens
:return: An iterator over a slightly more structured representation of the tokens
"""
tokens: Peekable[Token] = Peekable[Token](tokens_iter)
while not tokens.is_empty():
token = next(tokens)
2 years ago
if token.type in (
TokenType.PSEUDO_OP,
TokenType.LABEL,
TokenType.INSTRUCTION_NAME,
):
yield token, tuple(take_arguments(tokens))
def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
"""
Consumes (argument comma)* and yields argument.value until newline is reached
If an argument is not followed by either a newline or a comma, a parse exception is raised
The newline at the end is consumed
:param tokens: A Peekable iterator over some Tokens
"""
while True:
if tokens.peek().type == TokenType.ARGUMENT:
yield next(tokens).value
elif tokens.peek().type == TokenType.NEWLINE:
next(tokens)
break
elif tokens.peek().type == TokenType.COMMA:
next(tokens)
else:
break
# raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
class AssemblyFileLoader(ProgramLoader):
"""
This class loads assembly files written by hand. It understands some assembler directives and supports most
pseudo instructions. It does very little verification of source correctness.
It also supports numbered jump targets and properly supports local and global scope (.globl assembly directive)
The AssemblyFileLoader loads .asm, .S and .s files by default, and acts as a weak fallback to all other filetypes.
"""
2 years ago
def parse(self) -> Program:
2 years ago
with open(self.source_path, "r") as f:
return parse_tokens(self.filename, tokenize(f))
def parse_io(self, io):
return parse_tokens(self.filename, tokenize(io))
@classmethod
def can_parse(cls, source_path: str) -> float:
"""
It also acts as a weak fallback if no other loaders want to take the file.
:param source_path: the path to the source file
:return:
"""
# gcc recognizes these line endings as assembly. So we will do too.
2 years ago
if source_path.split(".")[-1] in ("asm", "S", "s"):
return 1
return 0.01
@classmethod
def get_options(cls, argv: List[str]) -> [List[str], T_ParserOpts]:
return argv, {}