|
|
|
"""
|
|
|
|
RiscEmu (c) 2021 Anton Lydike
|
|
|
|
|
|
|
|
SPDX-License-Identifier: MIT
|
|
|
|
"""
|
|
|
|
import re
|
|
|
|
from typing import Dict, Tuple, Iterable, Callable, List
|
|
|
|
|
|
|
|
from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
|
|
|
|
from .colors import FMT_PARSE
|
|
|
|
from .helpers import Peekable
|
|
|
|
from .tokenizer import Token, TokenType, tokenize
|
|
|
|
from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction
|
|
|
|
from .types.exceptions import ParseException
|
|
|
|
|
|
|
|
|
|
|
|
def parse_instruction(token: Token, args: Tuple[str], context: ParseContext):
|
|
|
|
if context.section is None:
|
|
|
|
context.new_section(".text", MemorySectionType.Instructions)
|
|
|
|
if context.section.type != MemorySectionType.Instructions:
|
|
|
|
raise ParseException(
|
|
|
|
"{} {} encountered in invalid context: {}".format(token, args, context)
|
|
|
|
)
|
|
|
|
ins = SimpleInstruction(
|
|
|
|
token.value, args, context.context, context.current_address()
|
|
|
|
)
|
|
|
|
context.section.data.append(ins)
|
|
|
|
|
|
|
|
|
|
|
|
def parse_label(token: Token, args: Tuple[str], context: ParseContext):
|
|
|
|
name = token.value[:-1]
|
|
|
|
if re.match(r"^\d+$", name):
|
|
|
|
# relative label:
|
|
|
|
context.context.numbered_labels[name].append(context.current_address())
|
|
|
|
else:
|
|
|
|
if name in context.context.labels:
|
|
|
|
print(FMT_PARSE + "Warn: Symbol {} defined twice!".format(name))
|
|
|
|
context.add_label(name, context.current_address(), is_relative=True)
|
|
|
|
|
|
|
|
|
|
|
|
PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {
|
|
|
|
TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction,
|
|
|
|
TokenType.LABEL: parse_label,
|
|
|
|
TokenType.INSTRUCTION_NAME: parse_instruction,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
|
|
|
|
"""
|
|
|
|
Convert a token stream into a parsed program
|
|
|
|
:param name: the programs name
|
|
|
|
:param tokens_iter: the programs content, tokenized
|
|
|
|
:return: a parsed program
|
|
|
|
"""
|
|
|
|
context = ParseContext(name)
|
|
|
|
|
|
|
|
for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
|
|
|
|
if token.type not in PARSERS:
|
|
|
|
raise ParseException("Unexpected token type: {}, {}".format(token, args))
|
|
|
|
PARSERS[token.type](token, args, context)
|
|
|
|
|
|
|
|
return context.finalize()
|
|
|
|
|
|
|
|
|
|
|
|
def composite_tokenizer(
|
|
|
|
tokens_iter: Iterable[Token],
|
|
|
|
) -> Iterable[Tuple[Token, Tuple[str]]]:
|
|
|
|
"""
|
|
|
|
Convert an iterator over tokens into an iterator over tuples: (token, list(token))
|
|
|
|
|
|
|
|
The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before
|
|
|
|
a newline is encountered
|
|
|
|
:param tokens_iter: An iterator over tokens
|
|
|
|
:return: An iterator over a slightly more structured representation of the tokens
|
|
|
|
"""
|
|
|
|
tokens: Peekable[Token] = Peekable[Token](tokens_iter)
|
|
|
|
|
|
|
|
while not tokens.is_empty():
|
|
|
|
token = next(tokens)
|
|
|
|
if token.type in (
|
|
|
|
TokenType.PSEUDO_OP,
|
|
|
|
TokenType.LABEL,
|
|
|
|
TokenType.INSTRUCTION_NAME,
|
|
|
|
):
|
|
|
|
yield token, tuple(take_arguments(tokens))
|
|
|
|
|
|
|
|
|
|
|
|
def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
|
|
|
|
"""
|
|
|
|
Consumes (argument comma)* and yields argument.value until newline is reached
|
|
|
|
If an argument is not followed by either a newline or a comma, a parse exception is raised
|
|
|
|
The newline at the end is consumed
|
|
|
|
:param tokens: A Peekable iterator over some Tokens
|
|
|
|
"""
|
|
|
|
while True:
|
|
|
|
if tokens.peek().type == TokenType.ARGUMENT:
|
|
|
|
yield next(tokens).value
|
|
|
|
elif tokens.peek().type == TokenType.NEWLINE:
|
|
|
|
next(tokens)
|
|
|
|
break
|
|
|
|
elif tokens.peek().type == TokenType.COMMA:
|
|
|
|
next(tokens)
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
|
|
|
|
# raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
|
|
|
|
|
|
|
|
|
|
|
|
class AssemblyFileLoader(ProgramLoader):
|
|
|
|
"""
|
|
|
|
This class loads assembly files written by hand. It understands some assembler directives and supports most
|
|
|
|
pseudo instructions. It does very little verification of source correctness.
|
|
|
|
|
|
|
|
It also supports numbered jump targets and properly supports local and global scope (.globl assembly directive)
|
|
|
|
|
|
|
|
|
|
|
|
The AssemblyFileLoader loads .asm, .S and .s files by default, and acts as a weak fallback to all other filetypes.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def parse(self) -> Program:
|
|
|
|
with open(self.source_path, "r") as f:
|
|
|
|
return parse_tokens(self.filename, tokenize(f))
|
|
|
|
|
|
|
|
def parse_io(self, io):
|
|
|
|
return parse_tokens(self.filename, tokenize(io))
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def can_parse(cls, source_path: str) -> float:
|
|
|
|
"""
|
|
|
|
|
|
|
|
It also acts as a weak fallback if no other loaders want to take the file.
|
|
|
|
|
|
|
|
:param source_path: the path to the source file
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
# gcc recognizes these line endings as assembly. So we will do too.
|
|
|
|
if source_path.split(".")[-1] in ("asm", "S", "s"):
|
|
|
|
return 1
|
|
|
|
return 0.01
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def get_options(cls, argv: List[str]) -> [List[str], T_ParserOpts]:
|
|
|
|
return argv, {}
|