You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
128 lines
4.7 KiB
Python
128 lines
4.7 KiB
Python
"""
|
|
RiscEmu (c) 2021 Anton Lydike
|
|
|
|
SPDX-License-Identifier: MIT
|
|
"""
|
|
import re
|
|
from typing import Dict, Tuple, Iterable, Callable, List
|
|
|
|
from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
|
|
from .colors import FMT_PARSE
|
|
from .helpers import Peekable
|
|
from .tokenizer import Token, TokenType, tokenize
|
|
from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction
|
|
from .types.exceptions import ParseException
|
|
|
|
|
|
def parse_instruction(token: Token, args: Tuple[str], context: ParseContext):
|
|
if context.section is None or context.section.type != MemorySectionType.Instructions:
|
|
raise ParseException("{} {} encountered in invalid context: {}".format(token, args, context))
|
|
ins = SimpleInstruction(token.value, args, context.context, context.section.current_address())
|
|
context.section.data.append(ins)
|
|
|
|
|
|
def parse_label(token: Token, args: Tuple[str], context: ParseContext):
|
|
name = token.value[:-1]
|
|
if re.match(r'^\d+$', name):
|
|
# relative label:
|
|
context.context.numbered_labels[name].append(context.section.current_address())
|
|
else:
|
|
if name in context.context.labels:
|
|
print(FMT_PARSE + 'Warn: Symbol {} defined twice!'.format(name))
|
|
context.add_label(name, context.section.current_address(), is_relative=True)
|
|
|
|
|
|
PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {
|
|
TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction,
|
|
TokenType.LABEL: parse_label,
|
|
TokenType.INSTRUCTION_NAME: parse_instruction
|
|
}
|
|
|
|
|
|
def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
|
|
"""
|
|
Convert a token stream into a parsed program
|
|
:param name: the programs name
|
|
:param tokens_iter: the programs content, tokenized
|
|
:return: a parsed program
|
|
"""
|
|
context = ParseContext(name)
|
|
|
|
for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
|
|
if token.type not in PARSERS:
|
|
raise ParseException("Unexpected token type: {}, {}".format(token, args))
|
|
PARSERS[token.type](token, args, context)
|
|
|
|
return context.finalize()
|
|
|
|
|
|
def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]:
|
|
"""
|
|
Convert an iterator over tokens into an iterator over tuples: (token, list(token))
|
|
|
|
The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before
|
|
a newline is encountered
|
|
:param tokens_iter: An iterator over tokens
|
|
:return: An iterator over a slightly more structured representation of the tokens
|
|
"""
|
|
tokens: Peekable[Token] = Peekable[Token](tokens_iter)
|
|
|
|
while not tokens.is_empty():
|
|
token = next(tokens)
|
|
if token.type in (TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME):
|
|
yield token, tuple(take_arguments(tokens))
|
|
|
|
|
|
def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
|
|
"""
|
|
Consumes (argument comma)* and yields argument.value until newline is reached
|
|
If an argument is not followed by either a newline or a comma, a parse exception is raised
|
|
The newline at the end is consumed
|
|
:param tokens: A Peekable iterator over some Tokens
|
|
"""
|
|
while True:
|
|
if tokens.peek().type == TokenType.ARGUMENT:
|
|
yield next(tokens).value
|
|
elif tokens.peek().type == TokenType.NEWLINE:
|
|
next(tokens)
|
|
break
|
|
elif tokens.peek().type == TokenType.COMMA:
|
|
next(tokens)
|
|
else:
|
|
break
|
|
|
|
# raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
|
|
|
|
|
|
class AssemblyFileLoader(ProgramLoader):
|
|
"""
|
|
This class loads assembly files written by hand. It understands some assembler directives and supports most
|
|
pseudo instructions. It does very little verification of source correctness.
|
|
|
|
It also supports numbered jump targets and properly supports local and global scope (.globl assembly directive)
|
|
|
|
|
|
The AssemblyFileLoader loads .asm, .S and .s files by default, and acts as a weak fallback to all other filetypes.
|
|
"""
|
|
def parse(self) -> Program:
|
|
with open(self.source_path, 'r') as f:
|
|
return parse_tokens(self.filename, tokenize(f))
|
|
|
|
@classmethod
|
|
def can_parse(cls, source_path: str) -> float:
|
|
"""
|
|
|
|
It also acts as a weak fallback if no other loaders want to take the file.
|
|
|
|
:param source_path: the path to the source file
|
|
:return:
|
|
"""
|
|
# gcc recognizes these line endings as assembly. So we will do too.
|
|
if source_path.split('.')[-1] in ('asm', 'S', 's'):
|
|
return 1
|
|
return 0.01
|
|
|
|
@classmethod
|
|
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
|
|
return argv, {}
|