From d5a4acef67cf7798157bf09938bcad5ee968f472 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Mon, 13 Dec 2021 23:23:55 +0100 Subject: [PATCH 01/30] tokenizer reimplemented --- riscemu/CPU.py | 1 - riscemu/ExecutableParser.py | 10 +- riscemu/Tokenizer.py | 388 ++++++++---------------------------- riscemu/__init__.py | 2 - test/test_helpers.py | 12 ++ 5 files changed, 101 insertions(+), 312 deletions(-) diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 962ef85..3df7d29 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -9,7 +9,6 @@ on them. import sys from typing import Tuple, List, Dict, Callable, Type -from .Tokenizer import RiscVTokenizer from .Executable import MemoryFlags from .Syscall import SyscallInterface, get_syscall_symbols from .Exceptions import RiscemuBaseException, LaunchDebuggerException diff --git a/riscemu/ExecutableParser.py b/riscemu/ExecutableParser.py index c3a9fd7..3e18c3d 100644 --- a/riscemu/ExecutableParser.py +++ b/riscemu/ExecutableParser.py @@ -10,7 +10,7 @@ from .helpers import parse_numeric_argument, int_to_bytes from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags from .Exceptions import * -from .Tokenizer import RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, RiscVPseudoOpToken +from .Tokenizer import tokenize, TokenType, Token, COMMA, NEWLINE from typing import Dict, Tuple, List, Optional @@ -22,7 +22,7 @@ class ExecutableParser: tokenizer: 'RiscVTokenizer' def __init__(self, tokenizer: 'RiscVTokenizer'): - self.instructions: List[RiscVInstructionToken] = list() + self.instructions: List['RiscVInstructionToken'] = list() self.symbols: Dict[str, Tuple[str, int]] = dict() self.sections: Dict[str, MemorySection] = dict() self.tokenizer = tokenizer @@ -37,11 +37,11 @@ class ExecutableParser: :raise ParseException: Raises a ParseException when invalid input is read """ for token in self.tokenizer.tokens: - if isinstance(token, RiscVInstructionToken): + if isinstance(token, 'RiscVInstructionToken'): self.parse_instruction(token) - elif isinstance(token, RiscVSymbolToken): + elif isinstance(token, 'RiscVSymbolToken'): self.handle_symbol(token) - elif isinstance(token, RiscVPseudoOpToken): + elif isinstance(token, 'RiscVPseudoOpToken'): self.handle_pseudo_op(token) return self._get_execuable() diff --git a/riscemu/Tokenizer.py b/riscemu/Tokenizer.py index 68be1ac..db9d330 100644 --- a/riscemu/Tokenizer.py +++ b/riscemu/Tokenizer.py @@ -5,316 +5,96 @@ SPDX-License-Identifier: MIT """ import re -from enum import IntEnum -from typing import List +from dataclasses import dataclass +from enum import Enum, auto +from typing import List, Iterable +from riscemu.decoder import RISCV_REGS from .Exceptions import ParseException -PSEUDO_OPS = [ - '.asciiz', - '.double', - '.extern', - '.global', - '.align', - '.float', - '.kdata', - '.ktext', - '.space', - '.ascii', - '.byte', - '.data', - '.half', - '.text', - '.word', - '.set', -] +LINE_COMMENT_STARTERS = ('#', ';', '//') +WHITESPACE_PATTERN = re.compile(r'\s+') +MEMORY_ADDRESS_PATTERN = re.compile('^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$') +REGISTER_NAMES = RISCV_REGS -COMMENT_START = ["#", ";"] +I = lambda x: x -REG_VALID_SYMBOL_LABEL = re.compile(r'^([A-z_.][A-z_0-9.]*[A-z_0-9]|[A-z_]):') +class TokenType(Enum): + COMMA = auto() + ARGUMENT = auto() + PSEUDO_OP = auto() + INSTRUCTION_NAME = auto() + NEWLINE = auto() + LABEL = auto() -REG_WHITESPACE_UNTIL_NEWLINE = re.compile(r'^(\s*)\n') -REG_WHITESPACE = re.compile(r'^\s*') - -REG_NONWHITESPACE = re.compile(r'^[^\s]*') - -REG_UNTIL_NEWLINE = re.compile(r'^[^\n]*') - -REG_WHITESPACE_NO_LINEBREAK = re.compile(r'^[ \t]*') - -REG_VALID_ARGUMENT = re.compile( - r'^([+-]?(0x[0-9A-f]+|[0-9]+)|[A-z_.][A-z0-9_.]*[A-z_0-9]|[A-z_])(\(([A-z_.][A-z_0-9.]*[A-z_0-9]|[A-z_])\))?' -) - -REG_ARG_SPLIT = re.compile(r'^,[ \t]*') - - -def split_accepting_quotes(string, at=REG_ARG_SPLIT, quotes=('"', "'")): - pos = 0 - last_piece = 0 - pieces = [] - in_quotes = False - if string is None: - return pieces - while pos < len(string): - match = at.match(string[pos:]) - if match is not None: - if not in_quotes: - pieces.append(string[last_piece:pos]) - pos += len(match.group(0)) - last_piece = pos - else: - pos += len(match.group(0)) - elif string[pos] in quotes: - in_quotes = not in_quotes - pos += 1 - elif string[pos] in COMMENT_START and not in_quotes: # entering comment - break - else: - pos += 1 - if in_quotes: - print("[Tokenizer.split] unbalanced quotes in \"{}\"!".format(string)) - pieces.append(string[last_piece:pos]) - return pieces - - -class RiscVInput: - """ - Represents an Assembly file - """ - def __init__(self, content: str, name: str): - self.content = content - self.pos = 0 - self.len = len(content) - self.name = name - - @staticmethod - def from_file(src: str): - with open(src, 'r') as f: - return RiscVInput(f.read(), src) - - def peek(self, offset: int = 0, size: int = 1, regex: re.Pattern = None, text: str = None, regex_group: int = 0): - at = self.pos + offset - - if regex: - if not isinstance(regex, re.Pattern): - print("uncompiled regex passed to peek!") - regex = re.compile(regex) - match = regex.match(self.content[at:]) - if match is None: - return None - - if regex_group != 0 and not match.group(0).startswith(match.group(regex_group)): - print("Cannot peek regex group that does not start at match start!") - return None - return match.group(regex_group) - if text: - if self.content[at:].startswith(text): - return self.content[at:at + len(text)] - return False - return self.content[at:at + size] - - def peek_one_of(self, options: List[str]): - longest_peek = 0 - ret = False - for text in options: - if self.peek(text=text): - if len(text) > longest_peek: - longest_peek = len(text) - ret = text - return ret - - def consume(self, size: int = 1, regex: re.Pattern = None, text: str = None, regex_group: int = 0): - at = self.pos - - if regex: - if not isinstance(regex, re.Pattern): - print("uncompiled regex passed to peek!") - regex = re.compile(regex) - match = regex.match(self.content[at:]) - if match is None: - return None - - if regex_group != 0 and not match.group(0).startswith(match.group(regex_group)): - print("Cannot consume regex group that does not start at match start!") - return None - self.pos += len(match.group(regex_group)) - return match.group(regex_group) - - if text: - if self.content[at:].startswith(text): - self.pos += len(text) - return text - return None - - self.pos += size - return self.content[at:at + size] - - def consume_one_of(self, options: List[str]): - longest_peek = 0 - ret = False - for text in options: - if self.peek(text=text): - if len(text) > longest_peek: - longest_peek = len(text) - ret = text - self.consume(text=ret) - return ret - - def seek_newline(self): - return self.consume(regex=REG_WHITESPACE_UNTIL_NEWLINE, regex_group=1) - - def consume_whitespace(self, linebreak=True): - if linebreak: - return self.consume(regex=REG_WHITESPACE) - return self.consume(regex=REG_WHITESPACE_NO_LINEBREAK) - - def has_next(self): - return self.pos < self.len - - def context(self, size: int = 5): - """ - returns a context string: - | - """ - start = max(self.pos - size, 0) - end = min(self.pos + size, self.len - 1) - - return self.content[start:self.pos] + '|' + self.content[self.pos:end] - - -class TokenType(IntEnum): - SYMBOL = 0 - INSTRUCTION = 1 - PSEUDO_OP = 2 - - def __repr__(self): - return self.name - - def __str__(self): - return self.name - - -class RiscVToken: +@dataclass(frozen=True) +class Token: type: TokenType + value: str - def __init__(self, t_type: TokenType): - self.type = t_type - - def __repr__(self): - return "{}[{}]({})".format(self.__class__.__name__, self.type, self.text()) - - def text(self): - """ - create text representation of instruction - """ - return "unknown" - - -class RiscVInstructionToken(RiscVToken): - def __init__(self, name, args): - super().__init__(TokenType.INSTRUCTION) - self.instruction = name - self.args = args - - def text(self): - if len(self.args) == 0: - return self.instruction - if len(self.args) == 1: - return "{} {}".format(self.instruction, self.args[0]) - if len(self.args) == 2: - return "{} {}, {}".format(self.instruction, *self.args) - return "{} {}, {}, {}".format(self.instruction, *self.args) - - -class RiscVSymbolToken(RiscVToken): - def __init__(self, name): - super().__init__(TokenType.SYMBOL) - self.name = name - - def text(self): - return self.name - - -class RiscVPseudoOpToken(RiscVToken): - def __init__(self, name, args): - super().__init__(TokenType.PSEUDO_OP) - self.name = name - self.args = args - - def text(self): - return "{} {}".format(self.name, self.args) - - -class RiscVTokenizer: - """ - A tokenizer for the RISC-V syntax of a given CPU - """ - def __init__(self, input_assembly: RiscVInput, instructions: List[str]): - self.input = input_assembly - self.tokens: List[RiscVToken] = [] - self.name = input_assembly.name - self.instructions = instructions - - def tokenize(self): - while self.input.has_next(): - # remove leading whitespaces, place cursor at text start - self.input.consume_whitespace() - - # check if we have a pseudo op - if self.input.peek_one_of(PSEUDO_OPS): - self.parse_pseudo_op() - - # check if we have a symbol (like main:) - elif self.input.peek(regex=REG_VALID_SYMBOL_LABEL): - self.parse_symbol() - - # comment - elif self.input.peek() in COMMENT_START: - self.parse_comment() - - # must be instruction - elif self.input.peek_one_of(self.instructions): - self.parse_instruction() - else: - token = self.input.peek(size=5) - raise ParseException("Unknown token around {} at: {}".format(repr(token), repr(self.input.context()))) - self.input.consume_whitespace() - - def parse_pseudo_op(self): - name = self.input.consume_one_of(PSEUDO_OPS) - self.input.consume_whitespace(linebreak=False) - - arg_str = self.input.consume(regex=REG_UNTIL_NEWLINE) - if not arg_str: - args = [] - else: - args = split_accepting_quotes(arg_str) - - self.tokens.append(RiscVPseudoOpToken(name[1:], args)) - - def parse_symbol(self): - name = self.input.consume(regex=REG_VALID_SYMBOL_LABEL) - self.tokens.append(RiscVSymbolToken(name[:-1])) - if not self.input.consume_whitespace(): - print("[Tokenizer] symbol declaration should always be followed by whitespace (at {})!".format( - self.input.context())) - - def parse_instruction(self): - ins = self.input.consume_one_of(self.instructions) - args = [] - self.input.consume_whitespace(linebreak=False) - while self.input.peek(regex=REG_VALID_ARGUMENT) and len(args) < 3: - arg = self.input.consume(regex=REG_VALID_ARGUMENT) - args.append(arg) - if self.input.peek(text=','): - self.input.consume(text=',') - self.input.consume_whitespace(linebreak=False) - else: - break - self.tokens.append(RiscVInstructionToken(ins, args)) - - def parse_comment(self): - # just consume the rest - self.input.consume(regex=REG_UNTIL_NEWLINE) + def __str__(self): + if self.type == TokenType.NEWLINE: + return '\\n' + if self.type == TokenType.COMMA: + return ', ' + return '{}({}) '.format(self.type.name[0:3], self.value) + +NEWLINE = Token(TokenType.NEWLINE, '\n') +COMMA = Token(TokenType.COMMA, ',') + + +def tokenize(input: Iterable[str]) -> Iterable[Token]: + for line in input: + for line_comment_start in LINE_COMMENT_STARTERS: + if line_comment_start in line: + line = line[:line.index(line_comment_start)] + line.strip(' \t\n') + if not line: + continue + + parts = list(part for part in re.split(WHITESPACE_PATTERN, line) if part) + + yield from parse_line(parts) + yield NEWLINE + + +def parse_line(parts: List[str]) -> Iterable[Token]: + if len(parts) == 0: + return () + first_token = parts[0] + + if first_token[0] == '.': + yield Token(TokenType.PSEUDO_OP, first_token) + elif first_token[-1] == ':': + yield Token(TokenType.LABEL, first_token) + else: + yield Token(TokenType.INSTRUCTION_NAME, first_token) + + for part in parts[1:]: + if part == ',': + yield COMMA + continue + yield from parse_arg(part) + + +def parse_arg(arg: str) -> Iterable[Token]: + comma = arg[-1] == ',' + arg = arg[:-1] if comma else arg + mem_match_resul = re.match(MEMORY_ADDRESS_PATTERN, arg) + if mem_match_resul: + register = mem_match_resul.group(2).lower() + if register not in RISCV_REGS: + raise ParseException(f'"{register}" is not a valid register!') + yield Token(TokenType.ARGUMENT, register) + yield Token(TokenType.ARGUMENT, mem_match_resul.group(1)) + else: + yield Token(TokenType.ARGUMENT, arg) + if comma: + yield COMMA + + +def print_tokens(tokens: Iterable[Token]): + for token in tokens: + print(token, end='\n' if token == NEWLINE else '') + print("", flush=True, end="") diff --git a/riscemu/__init__.py b/riscemu/__init__.py index e006fb3..fc080cf 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -11,8 +11,6 @@ It contains everything needed to run assembly files, so you don't need any custo from .Exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException -from .Tokenizer import RiscVInput, RiscVTokenizer - from .Executable import Executable, LoadedExecutable, LoadedMemorySection from .ExecutableParser import ExecutableParser diff --git a/test/test_helpers.py b/test/test_helpers.py index 1166e50..bc8ef0d 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -28,3 +28,15 @@ class Test(TestCase): self.assertEqual(to_signed(0xffed36e4), -1231132) self.assertEqual(to_signed(0x0FFFFFFF), 0x0FFFFFFF) + def test_bind_twos_complement(self): + minval = -(1 << 31) + maxval = ((1 << 31)-1) + + self.assertEqual(bind_twos_complement(minval), minval, "minval preserves") + self.assertEqual(bind_twos_complement(minval), minval, ) + self.assertEqual(bind_twos_complement(maxval), maxval, "maxval preserves") + self.assertEqual(bind_twos_complement(minval - 1), maxval, "minval-1 wraps") + self.assertEqual(bind_twos_complement(maxval + 1), minval, "maxval+1 wraps") + self.assertEqual(bind_twos_complement(0), 0, "0 is 0") + self.assertEqual(bind_twos_complement(1), 1, "1 is 1") + self.assertEqual(bind_twos_complement(-1), -1, "-1 is -1") \ No newline at end of file From 84562de98fa41e8d43f3cec1ed797e971a808d3c Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Tue, 14 Dec 2021 07:33:17 +0100 Subject: [PATCH 02/30] added tests for tokenizer --- test/test_tokenizer.py | 81 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 test/test_tokenizer.py diff --git a/test/test_tokenizer.py b/test/test_tokenizer.py new file mode 100644 index 0000000..659abf1 --- /dev/null +++ b/test/test_tokenizer.py @@ -0,0 +1,81 @@ +from unittest import TestCase + +from riscemu.Tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA + + +def ins(name: str) -> Token: + return Token(TokenType.INSTRUCTION_NAME, name) + + +def arg(name: str) -> Token: + return Token(TokenType.ARGUMENT, name) + + +def op(name: str) -> Token: + return Token(TokenType.PSEUDO_OP, name) + + +def lbl(name: str) -> Token: + return Token(TokenType.LABEL, name) + + +class Test(TestCase): + + def test_instructions(self): + program = [ + 'li a0, 144', + 'divi a0, a0, 12', + 'xori a1, a0, 12' + ] + tokens = [ + ins('li'), arg('a0'), COMMA, arg('144'), NEWLINE, + ins('divi'), arg('a0'), COMMA, arg('a0'), COMMA, arg('12'), NEWLINE, + ins('xori'), arg('a1'), COMMA, arg('a0'), COMMA, arg('12'), NEWLINE, + ] + self.assertEqual(list(tokenize(program)), tokens) + + def test_comments(self): + parsed_res = [ + ins('li'), arg('a0'), COMMA, arg('144'), NEWLINE + ] + for c in ('#', '//', ';'): + lines = [ + c + ' this is a comment', + 'li a0, 144' + ] + self.assertEqual(list(tokenize(lines)), parsed_res) + + def test_pseudo_ins(self): + parsed_res = [ + Token(TokenType.PSEUDO_OP, '.section'), Token(TokenType.ARGUMENT, '.text'), NEWLINE, + Token(TokenType.PSEUDO_OP, '.type'), Token(TokenType.ARGUMENT, 'init'), COMMA, + Token(TokenType.ARGUMENT, '@function'), NEWLINE + ] + input_program = [ + '.section .text', + '.type init, @function' + ] + self.assertEqual(list(tokenize(input_program)), parsed_res) + + def test_full_program(self): + program = """ +# a hashtag comment + +; semicolon comment followed by an empty line +.section .text +// double slash comment + addi sp, sp, -32 + sw s0, 0(ra) +section: + sub s0, s0, s0 +""" + tokens = [ + op('.section'), arg('.text'), NEWLINE, + ins('addi'), arg('sp'), COMMA, arg('sp'), COMMA, arg('-32'), NEWLINE, + ins('sw'), arg('s0'), COMMA, arg('ra'), arg('0'), NEWLINE, + lbl('section:'), NEWLINE, + ins('sub'), arg('s0'), COMMA, arg('s0'), COMMA, arg('s0'), NEWLINE + ] + + self.assertEqual(list(tokenize(program.splitlines())), tokens) + From dc4dca6fea930f1a8820d6d299adb4f9201e4539 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sat, 18 Dec 2021 00:25:39 +0100 Subject: [PATCH 03/30] [wip] almost done with the rework of the parser and internal data structure representation of programs --- riscemu/CPU.py | 14 +- riscemu/Executable.py | 319 ----------------------- riscemu/ExecutableParser.py | 193 -------------- riscemu/MMU.py | 115 ++------ riscemu/__init__.py | 12 +- riscemu/assembler.py | 169 ++++++++++++ riscemu/base_types.py | 186 +++++++++++++ riscemu/{Config.py => config.py} | 4 +- riscemu/debug.py | 6 +- riscemu/{Exceptions.py => exceptions.py} | 6 +- riscemu/helpers.py | 36 ++- riscemu/instructions/InstructionSet.py | 2 +- riscemu/instructions/RV32A.py | 2 +- riscemu/instructions/RV32I.py | 6 +- riscemu/instructions/RV32M.py | 2 +- riscemu/parser.py | 79 ++++++ riscemu/priv/ElfLoader.py | 4 +- riscemu/priv/ImageLoader.py | 6 +- riscemu/priv/PrivCPU.py | 4 +- riscemu/priv/PrivRV32I.py | 2 +- riscemu/{Registers.py => registers.py} | 4 +- riscemu/{Syscall.py => syscall.py} | 0 riscemu/{Tokenizer.py => tokenizer.py} | 6 +- riscemu/types.py | 72 +++++ test/test_tokenizer.py | 2 +- 25 files changed, 604 insertions(+), 647 deletions(-) delete mode 100644 riscemu/Executable.py delete mode 100644 riscemu/ExecutableParser.py create mode 100644 riscemu/assembler.py create mode 100644 riscemu/base_types.py rename riscemu/{Config.py => config.py} (85%) rename riscemu/{Exceptions.py => exceptions.py} (97%) create mode 100644 riscemu/parser.py rename riscemu/{Registers.py => registers.py} (98%) rename riscemu/{Syscall.py => syscall.py} (100%) rename riscemu/{Tokenizer.py => tokenizer.py} (94%) create mode 100644 riscemu/types.py diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 3df7d29..46a10ce 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -9,12 +9,12 @@ on them. import sys from typing import Tuple, List, Dict, Callable, Type -from .Executable import MemoryFlags -from .Syscall import SyscallInterface, get_syscall_symbols -from .Exceptions import RiscemuBaseException, LaunchDebuggerException +from .base_types import MemoryFlags +from .syscall import SyscallInterface, get_syscall_symbols +from .exceptions import RiscemuBaseException, LaunchDebuggerException from .MMU import MMU -from .Config import RunConfig -from .Registers import Registers +from .config import RunConfig +from .registers import Registers from .debug import launch_debug_session from .colors import FMT_CPU, FMT_NONE, FMT_ERROR @@ -23,7 +23,7 @@ import riscemu import typing if typing.TYPE_CHECKING: - from . import Executable, LoadedExecutable, LoadedInstruction + from . import base_types, LoadedExecutable, LoadedInstruction from .instructions.InstructionSet import InstructionSet @@ -78,7 +78,7 @@ class CPU: """ return RiscVTokenizer(tokenizer_input, self.all_instructions()) - def load(self, e: riscemu.Executable): + def load(self, e: riscemu.base_types): """ Load an executable into Memory """ diff --git a/riscemu/Executable.py b/riscemu/Executable.py deleted file mode 100644 index ed48eb5..0000000 --- a/riscemu/Executable.py +++ /dev/null @@ -1,319 +0,0 @@ -""" -RiscEmu (c) 2021 Anton Lydike - -SPDX-License-Identifier: MIT - -This file holds Executable and LoadedExecutable classes as well as loading and some linking code. - -FIXME: refactor this code into muliple files -""" - -from dataclasses import dataclass, field -from typing import Dict, List, Tuple, Union, Optional -from .Exceptions import * -from .helpers import * -from math import log - -import typing - -if typing.TYPE_CHECKING: - from .Tokenizer import RiscVInstructionToken - - -@dataclass(frozen=True) -class MemoryFlags: - read_only: bool - executable: bool - - def __repr__(self): - return "{}({},{})".format( - self.__class__.__name__, - 'ro' if self.read_only else 'rw', - 'x' if self.executable else '-' - ) - - -@dataclass -class MemorySection: - name: str - flags: MemoryFlags - size: int = 0 - content: List[bytearray] = field(default_factory=list) - - def add(self, data: bytearray): - self.content.append(data) - self.size += len(data) - - def continuous_content(self, parent: 'LoadedExecutable'): - """ - converts the content into one continuous bytearray - """ - if self.size == 0: - return bytearray(0) - content = self.content[0] - for b in self.content[1:]: - content += b - return content - - -@dataclass -class InstructionMemorySection(MemorySection): - content: List['RiscVInstructionToken'] = field(default_factory=list) - - def add_insn(self, insn: 'RiscVInstructionToken'): - self.content.append(insn) - self.size += 1 - - def continuous_content(self, parent: 'LoadedExecutable'): - return [ - LoadedInstruction(ins.instruction, ins.args, parent) - for ins in self.content - ] - - -@dataclass() -class Executable: - run_ptr: Tuple[str, int] - sections: Dict[str, MemorySection] - symbols: Dict[str, Tuple[str, int]] - exported_symbols: List[str] - name: str - - def __repr__(self): - return "{}(sections = {}, symbols = {}, run_ptr = {}, globals={})".format( - self.__class__.__name__, - " ".join(self.sections.keys()), - " ".join(self.symbols.keys()), - self.run_ptr, - ",".join(self.exported_symbols) - ) - - -### LOADING CODE - - -@dataclass(frozen=True) -class LoadedInstruction: - """ - An instruction which is loaded into memory. It knows the binary it belongs to to resolve symbols - """ - name: str - args: List[str] - bin: 'LoadedExecutable' - - def get_imm(self, num: int): - """ - parse and get immediate argument - """ - if len(self.args) <= num: - raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args)) - arg = self.args[num] - # look up symbols - if self.bin.has_symb(arg): - return self.bin.lookup_symbol(arg) - return parse_numeric_argument(arg) - - def get_imm_reg(self, num: int): - """ - parse and get an argument imm(reg) - """ - if len(self.args) <= num: - raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args)) - arg = self.args[num] - ASSERT_IN("(", arg) - imm, reg = arg[:-1].split("(") - if self.bin.has_symb(imm): - return self.bin.lookup_symbol(imm), reg - return parse_numeric_argument(imm), reg - - def get_reg(self, num: int): - """ - parse and get an register argument - """ - if len(self.args) <= num: - raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args)) - return self.args[num] - - def __repr__(self): - return "{} {}".format(self.name, ", ".join(self.args)) - - -@dataclass(frozen=True) -class LoadedMemorySection: - """ - A section which is loaded into memory - """ - name: str - base: int - size: int - content: Union[List[LoadedInstruction], bytearray] = field(repr=False) - flags: MemoryFlags - owner: str - - def read(self, offset: int, size: int): - if offset < 0: - raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, size, 'read') - if offset + size > self.size: - raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset, - size, 'read') - return self.content[offset: offset + size] - - def read_instruction(self, offset): - if not self.flags.executable: - raise MemoryAccessException('Section not executable!', self.base + offset, 1, 'read exec') - - if offset < 0: - raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'read exec') - if offset >= self.size: - raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset, - 1, 'read exec') - return self.content[offset] - - def write(self, offset, size, data): - if self.flags.read_only: - raise MemoryAccessException('Section not writeable {}'.format(self.name), self.base + offset, size, 'write') - - if offset < 0: - raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'write') - if offset >= self.size: - raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset, - size, 'write') - - for i in range(size): - self.content[offset + i] = data[i] - - def dump(self, at_addr=None, fmt='hex', max_rows=10, group=4, bytes_per_row=16, all=False): - highlight = -1 - if at_addr is None: - at_addr = self.base - else: - highlight = at_addr - self.base - - at_off = at_addr - self.base - start = max(align_addr(at_off - ((max_rows * bytes_per_row) // 2), 8) - 8, 0) - if all: - end = self.size - start = 0 - else: - end = min(start + (max_rows * bytes_per_row), self.size) - - fmt_str = " 0x{:0" + str(ceil(log(self.base + end, 16))) + "X}: {}" - - if self.flags.executable: - # this section holds instructions! - start = 0 if all else max(at_off - (max_rows // 2), 0) - end = self.size if all else min(self.size, start + max_rows) - print(FMT_MEM + "{}, viewing {} instructions:".format( - self, end - start - ) + FMT_NONE) - for i in range(start, end): - if i == highlight: - ins = FMT_UNDERLINE + FMT_ORANGE + repr(self.content[i]) + FMT_NONE - else: - ins = repr(self.content[i]) - print(fmt_str.format(self.base + i, ins)) - else: - print(FMT_MEM + "{}, viewing {} bytes:".format( - self, end - start - ) + FMT_NONE) - for i in range(0, end - start, bytes_per_row): - data = self.content[start + i: min(start + i + bytes_per_row, end)] - if start + i <= highlight <= start + i + bytes_per_row: - # do hightlight here! - hi_ind = (highlight - start - i) // group - print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group, highlight=hi_ind))) - else: - print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group))) - if end == self.size: - print(FMT_MEM + "End of section!" + FMT_NONE) - else: - print(FMT_MEM + "More bytes ..." + FMT_NONE) - - def __repr__(self): - return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format( - self.__class__.__name__, - self.name, - self.base, - self.size, - self.flags, - self.owner - ) - - -class LoadedExecutable: - """ - This represents an executable which is loaded into memory at address base_addr - - This is basicalle the "loader" in normal system environments - It initializes the stack and heap - - It still holds a symbol table, that is not accessible memory since I don't want to deal with - binary strings in memory etc. - """ - name: str - base_addr: int - sections_by_name: Dict[str, LoadedMemorySection] - sections: List[LoadedMemorySection] - symbols: Dict[str, int] - run_ptr: int - exported_symbols: Dict[str, int] - global_symbol_table: Dict[str, int] - - def __init__(self, exe: Executable, base_addr: int, global_symbol_table: Dict[str, int]): - self.name = exe.name - self.base_addr = base_addr - self.sections = list() - self.sections_by_name = dict() - self.symbols = dict() - self.exported_symbols = dict() - self.global_symbol_table = global_symbol_table - - curr = base_addr - for sec in exe.sections.values(): - loaded_sec = LoadedMemorySection( - sec.name, - curr, - sec.size, - sec.continuous_content(self), - sec.flags, - self.name - ) - self.sections.append(loaded_sec) - self.sections_by_name[loaded_sec.name] = loaded_sec - curr = align_addr(loaded_sec.size + curr) - - for name, (sec_name, offset) in exe.symbols.items(): - if sec_name == '_static_': - self.symbols[name] = offset - else: - ASSERT_IN(sec_name, self.sections_by_name) - self.symbols[name] = self.sections_by_name[sec_name].base + offset - - for name in exe.exported_symbols: - self.exported_symbols[name] = self.symbols[name] - - self.size = curr - base_addr - - # translate run_ptr from executable - run_ptr_sec, run_ptr_off = exe.run_ptr - self.run_ptr = self.sections_by_name[run_ptr_sec].base + run_ptr_off - - def lookup_symbol(self, name): - if name in self.symbols: - return self.symbols[name] - if name in self.global_symbol_table: - return self.global_symbol_table[name] - raise LinkerException('Symbol {} not found!'.format(name), (self,)) - - def __repr__(self): - return '{}[{}](base=0x{:08X}, size={}bytes, sections={}, run_ptr=0x{:08X})'.format( - self.__class__.__name__, - self.name, - self.base_addr, - self.size, - " ".join(self.sections_by_name.keys()), - self.run_ptr - ) - - def has_symb(self, arg): - return arg in self.symbols or arg in self.global_symbol_table diff --git a/riscemu/ExecutableParser.py b/riscemu/ExecutableParser.py deleted file mode 100644 index 3e18c3d..0000000 --- a/riscemu/ExecutableParser.py +++ /dev/null @@ -1,193 +0,0 @@ -""" -RiscEmu (c) 2021 Anton Lydike - -SPDX-License-Identifier: MIT - -This file holds the parser that parses the tokenizer output. -""" - -from .helpers import parse_numeric_argument, int_to_bytes -from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags -from .Exceptions import * - -from .Tokenizer import tokenize, TokenType, Token, COMMA, NEWLINE - -from typing import Dict, Tuple, List, Optional - - -class ExecutableParser: - """ - Parses output form the RiscVTokenizer - """ - tokenizer: 'RiscVTokenizer' - - def __init__(self, tokenizer: 'RiscVTokenizer'): - self.instructions: List['RiscVInstructionToken'] = list() - self.symbols: Dict[str, Tuple[str, int]] = dict() - self.sections: Dict[str, MemorySection] = dict() - self.tokenizer = tokenizer - self.active_section: Optional[str] = None - self.implicit_sections = False - self.globals: List[str] = list() - - def parse(self) -> Executable: - """ - parse tokenizer output into an executable - :return: the parsed executable - :raise ParseException: Raises a ParseException when invalid input is read - """ - for token in self.tokenizer.tokens: - if isinstance(token, 'RiscVInstructionToken'): - self.parse_instruction(token) - elif isinstance(token, 'RiscVSymbolToken'): - self.handle_symbol(token) - elif isinstance(token, 'RiscVPseudoOpToken'): - self.handle_pseudo_op(token) - return self._get_execuable() - - def _get_execuable(self) -> Executable: - start_ptr = ('text', 0) - if '_start' in self.symbols: - start_ptr = self.symbols['_start'] - elif 'main' in self.symbols: - start_ptr = self.symbols['main'] - return Executable(start_ptr, self.sections, self.symbols, self.globals, self.tokenizer.name) - - def parse_instruction(self, ins: 'RiscVInstructionToken') -> None: - """ - parses an Instruction token - :param ins: the instruction token - """ - if self.active_section is None: - self.op_text() - self.implicit_sections = True - - ASSERT_EQ(self.active_section, 'text') - sec = self._curr_sec() - if isinstance(sec, InstructionMemorySection): - sec.add_insn(ins) - else: - raise ParseException("SHOULD NOT BE REACHED") - - def handle_symbol(self, token: 'RiscVSymbolToken'): - """ - Handle a symbol token (such as 'main:') - :param token: the symbol token - """ - ASSERT_NOT_IN(token.name, self.symbols) - ASSERT_NOT_NULL(self.active_section) - sec_pos = self._curr_sec().size - self.symbols[token.name] = (self.active_section, sec_pos) - - def handle_pseudo_op(self, op: 'RiscVPseudoOpToken'): - """ - Handle a pseudo op token (such as '.word 0xffaabbcc') - :param op: the peseudo-op token - """ - name = 'op_' + op.name - if hasattr(self, name): - getattr(self, name)(op) - else: - raise ParseException("Unknown pseudo op: {}".format(op), (op,)) - - ## Pseudo op implementations: - def op_section(self, op: 'RiscVPseudoOpToken'): - """ - handles a .section token - :param op: The token - """ - ASSERT_LEN(op.args, 1) - name = op.args[0][1:] - ASSERT_IN(name, ('data', 'rodata', 'text')) - getattr(self, 'op_' + name)(op) - - def op_text(self, op: 'RiscVPseudoOpToken' = None): - """ - handles a .text token - :param op: The token - """ - self._set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection) - - def op_data(self, op: 'RiscVPseudoOpToken' = None): - """ - handles a .data token - :param op: The token - """ - self._set_sec('data', MemoryFlags(read_only=False, executable=False)) - - def op_rodata(self, op: 'RiscVPseudoOpToken' = None): - """ - handles a .rodata token - :param op: The token - """ - self._set_sec('rodata', MemoryFlags(read_only=True, executable=False)) - - def op_space(self, op: 'RiscVPseudoOpToken'): - """ - handles a .space token. Inserts empty space into the current (data or rodata) section - :param op: The token - """ - ASSERT_IN(self.active_section, ('data', 'rodata')) - ASSERT_LEN(op.args, 1) - size = parse_numeric_argument(op.args[0]) - self._curr_sec().add(bytearray(size)) - - def op_ascii(self, op: 'RiscVPseudoOpToken'): - """ - handles a .ascii token. Inserts ascii encoded text into the currrent data section - :param op: The token - """ - ASSERT_IN(self.active_section, ('data', 'rodata')) - ASSERT_LEN(op.args, 1) - str = op.args[0][1:-1].encode('ascii').decode('unicode_escape') - self._curr_sec().add(bytearray(str, 'ascii')) - - def op_asciiz(self, op: 'RiscVPseudoOpToken'): - """ - handles a .ascii token. Inserts nullterminated ascii encoded text into the currrent data section - :param op: The token - """ - ASSERT_IN(self.active_section, ('data', 'rodata')) - ASSERT_LEN(op.args, 1) - str = op.args[0][1:-1].encode('ascii').decode('unicode_escape') - self._curr_sec().add(bytearray(str + '\0', 'ascii')) - - def op_global(self, op: 'RiscVPseudoOpToken'): - """ - handles a .global token. Marks the token as global - :param op: The token - """ - ASSERT_LEN(op.args, 1) - name = op.args[0] - self.globals.append(name) - - def op_set(self, op: 'RiscVPseudoOpToken'): - """ - handles a .set name, val token. Sets the symbol name to val - :param op: The token - """ - ASSERT_LEN(op.args, 2) - name = op.args[0] - val = parse_numeric_argument(op.args[1]) - self.symbols[name] = ('_static_', val) - - def op_align(self, op: 'RiscVPseudoOpToken'): - """ - handles an align token. Currently a nop (just not implemented fully yet, as linker handles most alignement tasks) - :param op: The token - """ - pass - - def op_word(self, op: 'RiscVPseudoOpToken'): - ASSERT_LEN(op.args, 1) - val = parse_numeric_argument(op.args[0]) - self._curr_sec().add(int_to_bytes(val, 4)) - - ## Section handler code - def _set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection): - if name not in self.sections: - self.sections[name] = cls(name, flags) - self.active_section = name - - def _curr_sec(self): - return self.sections[self.active_section] diff --git a/riscemu/MMU.py b/riscemu/MMU.py index f7ca534..c255eb4 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -4,10 +4,10 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -from .Config import RunConfig -from .Executable import Executable, LoadedExecutable, LoadedMemorySection, LoadedInstruction, MemoryFlags +from .base_types import InstructionContext, Instruction, MemorySection, MemoryFlags, T_RelativeAddress, T_AbsoluteAddress, \ + Program from .helpers import align_addr, int_from_bytes -from .Exceptions import OutOfMemoryException, InvalidAllocationException +from .exceptions import OutOfMemoryException, InvalidAllocationException from .colors import * from typing import Dict, List, Tuple, Optional @@ -27,19 +27,14 @@ class MMU: No single allocation can be bigger than 64 MB """ - sections: List[LoadedMemorySection] + sections: List[MemorySection] """ A list of all loaded memory sections """ - binaries: List[LoadedExecutable] + programs: List[Program] """ - A list of all loaded executables - """ - - last_bin: Optional[LoadedExecutable] = None - """ - The last loaded executable (the next executable is inserted directly after this one) + A list of all loaded programs """ global_symbols: Dict[str, int] @@ -47,79 +42,14 @@ class MMU: The global symbol table """ - last_ins_sec: Optional[LoadedMemorySection] - - def __init__(self, conf: RunConfig): - """ - Create a new MMU, respecting the active RunConfiguration - - :param conf: The config to respect - """ - self.sections: List[LoadedMemorySection] = list() - self.binaries: List[LoadedExecutable] = list() - self.first_free_addr: int = 0x100 - self.conf: RunConfig = conf - self.global_symbols: Dict[str, int] = dict() - self.last_ins_sec = None - - def load_bin(self, exe: Executable) -> LoadedExecutable: - """ - Load an executable into memory - - :param exe: the executable to load - :return: A LoadedExecutable - :raises OutOfMemoryException: When all memory is used - """ - - # align to 8 byte word - addr = align_addr(self.first_free_addr) - - loaded_bin = LoadedExecutable(exe, addr, self.global_symbols) - - if loaded_bin.size + addr > self.max_size: - raise OutOfMemoryException('load of executable') - - self.binaries.append(loaded_bin) - self.first_free_addr = loaded_bin.base_addr + loaded_bin.size - - # read sections into sec dict - for sec in loaded_bin.sections: - self.sections.append(sec) - - self.global_symbols.update(loaded_bin.exported_symbols) - - print(FMT_MEM + "[MMU] Successfully loaded{}: {}".format(FMT_NONE, loaded_bin)) - - return loaded_bin - - def allocate_section(self, name: str, req_size: int, flag: MemoryFlags): + def __init__(self): """ - Used to allocate a memory region (data only). Use `load_bin` if you want to load a binary, this is used for - stack and maybe malloc in the future. - - :param name: Name of the section to allocate - :param req_size: The requested size - :param flag: The flags protecting this memory section - :return: The LoadedMemorySection + Create a new MMU """ - if flag.executable: - raise InvalidAllocationException('cannot allocate executable section', name, req_size, flag) - - if req_size < 0: - raise InvalidAllocationException('Invalid size request', name, req_size, flag) + self.sections = list() + self.global_symbols = dict() - if req_size > self.max_alloc_size: - raise InvalidAllocationException('Cannot allocate more than {} bytes at a time'.format(self.max_alloc_size), - name, req_size, flag) - - base = align_addr(self.first_free_addr) - size = align_addr(req_size) - sec = LoadedMemorySection(name, base, size, bytearray(size), flag, "") - self.sections.append(sec) - self.first_free_addr = base + size - return sec - - def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]: + def get_sec_containing(self, addr: T_AbsoluteAddress) -> Optional[MemorySection]: """ Returns the section that contains the address addr @@ -131,29 +61,25 @@ class MMU: return sec return None - def get_bin_containing(self, addr: int) -> Optional[LoadedExecutable]: + def get_bin_containing(self, addr: T_AbsoluteAddress) -> Optional[Program]: for exe in self.binaries: if exe.base_addr <= addr < exe.base_addr + exe.size: return exe return None - def read_ins(self, addr: int) -> LoadedInstruction: + def read_ins(self, addr: T_AbsoluteAddress) -> Instruction: """ Read a single instruction located at addr :param addr: The location :return: The Instruction """ - sec = self.last_ins_sec - if sec is not None and sec.base <= addr < sec.base + sec.size: - return sec.read_instruction(addr - sec.base) sec = self.get_sec_containing(addr) - self.last_ins_sec = sec if sec is None: print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! " "Have you forgotten an exit syscall or ret statement?" + FMT_NONE) raise RuntimeError("No next instruction available!") - return sec.read_instruction(addr - sec.base) + return sec.read_ins(addr - sec.base) def read(self, addr: int, size: int) -> bytearray: """ @@ -164,6 +90,9 @@ class MMU: :return: The bytearray at addr """ sec = self.get_sec_containing(addr) + if sec is None: + print(FMT_MEM + "[MMU] Trying to read data form invalid region at 0x{:x}! ".format(addr) + FMT_NONE) + raise RuntimeError("Reading from uninitialized memory region!") return sec.read(addr - sec.base, size) def write(self, addr: int, size: int, data): @@ -176,7 +105,7 @@ class MMU: """ sec = self.get_sec_containing(addr) if sec is None: - print(FMT_MEM + '[MMU] Invalid write into non-initialized section at 0x{:08X}'.format(addr) + FMT_NONE) + print(FMT_MEM + '[MMU] Invalid write into non-initialized region at 0x{:08X}'.format(addr) + FMT_NONE) raise RuntimeError("No write pls") return sec.write(addr - sec.base, size, data) @@ -195,7 +124,7 @@ class MMU: return sec.dump(addr, *args, **kwargs) - def symbol(self, symb: str): + def label(self, symb: str): """ Look up the symbol symb in all local symbol tables (and the global one) @@ -204,9 +133,9 @@ class MMU: print(FMT_MEM + "[MMU] Lookup for symbol {}:".format(symb) + FMT_NONE) if symb in self.global_symbols: print(" Found global symbol {}: 0x{:X}".format(symb, self.global_symbols[symb])) - for b in self.binaries: - if symb in b.symbols: - print(" Found local symbol {}: 0x{:X} in {}".format(symb, b.symbols[symb], b.name)) + for section in self.sections: + if symb in section.context.labels: + print(" Found local labels {}: 0x{:X} in {}".format(symb, section.context.labels[symb], section.name)) def read_int(self, addr: int) -> int: return int_from_bytes(self.read(addr, 4)) diff --git a/riscemu/__init__.py b/riscemu/__init__.py index fc080cf..d580e2b 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -8,21 +8,19 @@ This package aims at providing an all-round usable RISC-V emulator and debugger It contains everything needed to run assembly files, so you don't need any custom compilers or toolchains """ -from .Exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ +from .exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException -from .Executable import Executable, LoadedExecutable, LoadedMemorySection - -from .ExecutableParser import ExecutableParser +from .base_types import Executable, LoadedExecutable, LoadedMemorySection from .instructions import * from .MMU import MMU -from .Registers import Registers -from .Syscall import SyscallInterface, Syscall +from .registers import Registers +from .syscall import SyscallInterface, Syscall from .CPU import CPU -from .Config import RunConfig +from .config import RunConfig __author__ = "Anton Lydike " __copyright__ = "Copyright 2021 Anton Lydike" diff --git a/riscemu/assembler.py b/riscemu/assembler.py new file mode 100644 index 0000000..c8c7546 --- /dev/null +++ b/riscemu/assembler.py @@ -0,0 +1,169 @@ +from typing import Optional, Tuple, Union +from enum import Enum, auto +from typing import Optional, Tuple, Union + +from helpers import parse_numeric_argument +from .base_types import Program, T_RelativeAddress, InstructionContext +from .colors import FMT_PARSE, FMT_NONE +from .exceptions import ParseException +from .helpers import ASSERT_LEN +from .tokenizer import Token +from .types import BinaryDataMemorySection, InstructionMemorySection + + +INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini') + + +class MemorySectionType(Enum): + Data = auto() + Instructions = auto() + + +class CurrentSection: + name: str + data: Union[list, bytearray] + type: MemorySectionType + + def current_address(self) -> T_RelativeAddress: + if self.type == MemorySectionType.Data: + return len(self.data) + return len(self.data) * 4 + + def __repr__(self): + return "{}(name={},data={},type={})".format( + self.__class__.__name__, self.name, + self.data, self.type.name + ) + + +class ParseContext: + section: Optional[CurrentSection] + context: InstructionContext + program: Program + + def __init__(self, name: str): + self.program = Program(name) + self.context = self.program.context + self.section = None + + def finalize(self) -> Program: + self.finalize_section() + return self.program + + def finalize_section(self): + if self.section is None: + return + if self.section.type == MemorySectionType.Data: + section = BinaryDataMemorySection(self.section.data, self.section.name, self.context) + self.program.add_section(section) + elif self.section.type == MemorySectionType.Instructions: + section = InstructionMemorySection(self.section.data, self.section.name, self.context) + self.program.add_section(section) + + def __repr__(self): + return "{}(\n\tsetion={},\n\tprogram={}\n)".format( + self.__class__.__name__, self.section, self.program + ) + + +def ASSERT_IN_SECTION_TYPE(context: ParseContext, type: MemorySectionType): + if context.section is None: + raise ParseException('Error, expected to be in {} section, but no section is present...'.format(type.name)) + if context.section.type != type: + raise ParseException( + 'Error, expected to be in {} section, but currently in {}...'.format(type.name, context.section) + ) + + +def get_section_base_name(section_name: str) -> str: + return '.' + section_name.split('.')[1] + + +class AssemblerDirectives: + """ + This class represents a collection of all assembler directives as documented by + https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md#pseudo-ops + + All class methods prefixed with op_ are directly used as assembler directives. + """ + + @classmethod + def op_align(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 1) + ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data) + align_to = parse_numeric_argument(args[0]) + current_mod = context.section.current_address() % align_to + if current_mod == 0: + return + context.section.data += bytearray(align_to - current_mod) + + @classmethod + def op_section(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 1) + context.finalize_section() + + if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES: + context.section.type = MemorySectionType.Instructions + context.section.data = list() + else: + context.section.type = MemorySectionType.Data + context.section.data = bytearray() + context.section.name = args[0] + + @classmethod + def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 1) + context.program.global_labels.add(args[0]) + + @classmethod + def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 2) + name = args[0] + value = parse_numeric_argument(args[1]) + context.context.labels[name] = value + + @classmethod + def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 1) + ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data) + size = parse_numeric_argument(args[0]) + cls.add_bytes(size, bytearray(size), context) + + @classmethod + def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext): + ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data) + + if content is None: + content = bytearray(size) + + @classmethod + def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True): + encoded_bytes = bytearray(text.encode('ascii')) + if zero_terminate: + encoded_bytes += bytearray(1) + cls.add_bytes(len(encoded_bytes), encoded_bytes, context) + + @classmethod + def handle_instruction(cls, token: Token, args: Tuple[str], context: ParseContext): + op = token.value[1:] + if hasattr(cls, 'op_' + op): + getattr(cls, 'op_' + op)(token, args, context) + elif op in ('text', 'data', 'rodata', 'bss', 'sbss'): + cls.op_section(token, (token.value,), context) + elif op in ('string', 'asciiz', 'asciz', 'ascii'): + ASSERT_LEN(args, 1) + cls.add_text(args[0], context, op == 'ascii') + elif op in DATA_OP_SIZES: + size = DATA_OP_SIZES[op] + for arg in args: + cls.add_bytes(size, parse_numeric_argument(arg), context) + else: + print(FMT_PARSE + "Unknown assembler directive: {} {} in {}".format(token, args, context) + FMT_NONE) + + +DATA_OP_SIZES = { + 'byte': 1, + '2byte': 2, 'half': 2, 'short': 2, + '4byte': 4, 'word': 4, 'long': 4, + '8byte': 8, 'dword': 8, 'quad': 8, +} diff --git a/riscemu/base_types.py b/riscemu/base_types.py new file mode 100644 index 0000000..0bf92b7 --- /dev/null +++ b/riscemu/base_types.py @@ -0,0 +1,186 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: MIT + +This file contains base classes which represent loaded programs +""" + +import re +from abc import ABC +from dataclasses import dataclass +from typing import Dict, List, Optional, Tuple, Set +from collections import defaultdict + +from .helpers import * + +T_RelativeAddress = int +T_AbsoluteAddress = int + +NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$') + + +@dataclass(frozen=True) +class MemoryFlags: + read_only: bool + executable: bool + + def __repr__(self): + return "{}({},{})".format( + self.__class__.__name__, + 'ro' if self.read_only else 'rw', + 'x' if self.executable else '-' + ) + + +class InstructionContext: + base_address: T_AbsoluteAddress + """ + The address where the instruction block is placed + """ + + labels: Dict[str, T_RelativeAddress] + """ + This dictionary maps all labels to their relative position of the instruction block + """ + numbered_labels: Dict[str, List[T_RelativeAddress]] + """ + This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where + the label was placed + """ + + def __init__(self): + self.labels = dict() + self.numbered_labels = defaultdict(list) + self.base_address = 0 + + def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_RelativeAddress]: + if NUMBER_SYMBOL_PATTERN.match(symbol): + if address_at is None: + raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol)) + + direction = symbol[-1] + if direction == 'b': + return max([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr < address_at], + default=None) + else: + return min([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr > address_at], + default=None) + else: + return self.labels.get(symbol, None) + + +class Instruction(ABC): + name: str + args: tuple + + @abstractmethod + def get_imm(self, num: int) -> int: + """ + parse and get immediate argument + """ + pass + + @abstractmethod + def get_imm_reg(self, num: int) -> Tuple[int, str]: + """ + parse and get an argument imm(reg) + """ + pass + + @abstractmethod + def get_reg(self, num: int) -> str: + """ + parse and get an register argument + """ + pass + + def __repr__(self): + return "{} {}".format(self.name, ", ".join(self.args)) + + +@dataclass +class MemorySection(ABC): + name: str + flags: MemoryFlags + size: int + base: T_AbsoluteAddress + owner: str + context: InstructionContext + + @abstractmethod + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + pass + + @abstractmethod + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + pass + + @abstractmethod + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + pass + + def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress], fmt: str = 'hex', + bytes_per_row: int = 16, rows: int = 10, group: int = 4): + if self.flags.executable: + bytes_per_row = 4 + highlight = None + if end is None: + end = start + (bytes_per_row * (rows // 2)) + highlight = start + start = start - (bytes_per_row * (rows // 2)) + if self.flags.executable: + print(FMT_MEM + "{}, viewing {} instructions:".format( + self, (end - start) // 4 + ) + FMT_NONE) + + for addr in range(start, end, 4): + if addr == highlight: + print(FMT_UNDERLINE + FMT_ORANGE, end='') + print("0x{:x}: {}{}".format( + self.base + addr, self.read_ins(addr), FMT_NONE + )) + else: + print(FMT_MEM + "{}, viewing {} bytes:".format( + self, (end - start) + ) + FMT_NONE) + + for addr in range(start, end, bytes_per_row): + hi_ind = (highlight - addr) // group + print("0x{:x}: {}{}".format( + self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE + )) + + def __repr__(self): + return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format( + self.__class__.__name__, + self.name, + self.base, + self.size, + self.flags, + self.owner + ) + + +class Program: + name: str + context: InstructionContext + global_labels: Set[str] + sections: List[MemorySection] + base: T_AbsoluteAddress = 0 + + def __init__(self, name: str, base: int = 0): + self.name = name + self.context = InstructionContext() + self.sections = [] + self.base = base + self.global_labels = set() + + def add_section(self, sec: MemorySection): + self.sections.append(sec) + + def __repr__(self): + return "{}(name={},context={},globals={},sections={},base={})".format( + self.__class__.__name__, self.name, self.context, self.global_labels, + [s.name for s in self.sections], self.base + ) diff --git a/riscemu/Config.py b/riscemu/config.py similarity index 85% rename from riscemu/Config.py rename to riscemu/config.py index 40ee2c5..7182958 100644 --- a/riscemu/Config.py +++ b/riscemu/config.py @@ -10,7 +10,7 @@ from typing import Optional @dataclass(frozen=True, init=True) class RunConfig: - stack_size: int = 8 * 1024 * 64 # for 8KB stack + stack_size: int = 8 * 1024 * 64 # for 8KB stack include_scall_symbols: bool = True add_accept_imm: bool = False # debugging @@ -21,3 +21,5 @@ class RunConfig: scall_fs: bool = False verbosity: int = 0 + +CONFIG = RunConfig() diff --git a/riscemu/debug.py b/riscemu/debug.py index 930dcbb..8350526 100644 --- a/riscemu/debug.py +++ b/riscemu/debug.py @@ -5,9 +5,9 @@ SPDX-License-Identifier: MIT """ import typing -from .Registers import Registers +from .registers import Registers from .colors import FMT_DEBUG, FMT_NONE -from .Executable import LoadedInstruction +from .base_types import Instruction from .helpers import * if typing.TYPE_CHECKING: @@ -50,7 +50,7 @@ def launch_debug_session(cpu: 'CPU', mmu: 'MMU', reg: 'Registers', prompt=""): return bin = mmu.get_bin_containing(cpu.pc) - ins = LoadedInstruction(name, list(args), bin) + ins = Instruction(name, list(args), bin) print(FMT_DEBUG + "Running instruction " + ins + FMT_NONE) cpu.run_instruction(ins) diff --git a/riscemu/Exceptions.py b/riscemu/exceptions.py similarity index 97% rename from riscemu/Exceptions.py rename to riscemu/exceptions.py index cfe4be8..b75b358 100644 --- a/riscemu/Exceptions.py +++ b/riscemu/exceptions.py @@ -7,11 +7,9 @@ SPDX-License-Identifier: MIT import typing from abc import abstractmethod +from .base_types import Instruction from .colors import * -if typing.TYPE_CHECKING: - from .Executable import LoadedInstruction - class RiscemuBaseException(BaseException): @abstractmethod @@ -116,7 +114,7 @@ class InvalidAllocationException(RiscemuBaseException): class UnimplementedInstruction(RiscemuBaseException): - def __init__(self, ins: 'LoadedInstruction'): + def __init__(self, ins: Instruction): self.ins = ins def message(self): diff --git a/riscemu/helpers.py b/riscemu/helpers.py index 8becedd..ad8ac06 100644 --- a/riscemu/helpers.py +++ b/riscemu/helpers.py @@ -5,7 +5,8 @@ SPDX-License-Identifier: MIT """ from math import log10, ceil -from .Exceptions import * +from .exceptions import * +from typing import Iterable, Iterator, TypeVar, Generic, List def align_addr(addr: int, to_bytes: int = 8) -> int: @@ -105,3 +106,36 @@ def bind_twos_complement(val): elif val > 2147483647: return val - 4294967296 return val + + +T = TypeVar('T') + + +class Peekable(Generic[T], Iterator[T]): + def __init__(self, iterable: Iterable[T]): + self.iterable = iter(iterable) + self.cache: List[T] = list() + + def __iter__(self) -> Iterator[T]: + return self + + def __next__(self) -> T: + if self.cache: + return self.cache.pop() + return next(self.iterable) + + def peek(self) -> T: + try: + if self.cache: + return self.cache[0] + pop = next(self.iterable) + self.cache.append(pop) + return pop + except StopIteration: + return None + + def push_back(self, item: T): + self.cache = [item] + self.cache + + def is_empty(self) -> bool: + return self.peek() is None diff --git a/riscemu/instructions/InstructionSet.py b/riscemu/instructions/InstructionSet.py index 6b55e7d..6666dc9 100644 --- a/riscemu/instructions/InstructionSet.py +++ b/riscemu/instructions/InstructionSet.py @@ -9,7 +9,7 @@ from typing import Tuple, Callable, Dict from abc import ABC from ..CPU import CPU from ..helpers import ASSERT_LEN, ASSERT_IN, to_unsigned -from ..Executable import LoadedInstruction +from ..base_types import LoadedInstruction class InstructionSet(ABC): diff --git a/riscemu/instructions/RV32A.py b/riscemu/instructions/RV32A.py index 9432c83..3de2383 100644 --- a/riscemu/instructions/RV32A.py +++ b/riscemu/instructions/RV32A.py @@ -1,5 +1,5 @@ from .InstructionSet import InstructionSet, LoadedInstruction -from ..Exceptions import INS_NOT_IMPLEMENTED +from ..exceptions import INS_NOT_IMPLEMENTED from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed diff --git a/riscemu/instructions/RV32I.py b/riscemu/instructions/RV32I.py index dcefb07..cb19af6 100644 --- a/riscemu/instructions/RV32I.py +++ b/riscemu/instructions/RV32I.py @@ -9,9 +9,9 @@ from .InstructionSet import * from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed from ..colors import FMT_DEBUG, FMT_NONE from ..debug import launch_debug_session -from ..Exceptions import LaunchDebuggerException -from ..Syscall import Syscall -from ..Executable import LoadedInstruction +from ..exceptions import LaunchDebuggerException +from ..syscall import Syscall +from ..base_types import LoadedInstruction class RV32I(InstructionSet): diff --git a/riscemu/instructions/RV32M.py b/riscemu/instructions/RV32M.py index bd0490f..5b1412f 100644 --- a/riscemu/instructions/RV32M.py +++ b/riscemu/instructions/RV32M.py @@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT """ from .InstructionSet import * -from ..Exceptions import INS_NOT_IMPLEMENTED +from ..exceptions import INS_NOT_IMPLEMENTED class RV32M(InstructionSet): diff --git a/riscemu/parser.py b/riscemu/parser.py new file mode 100644 index 0000000..64cefe8 --- /dev/null +++ b/riscemu/parser.py @@ -0,0 +1,79 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: MIT +""" +import re +from typing import Dict, Tuple, Iterable, Callable + +from helpers import Peekable +from .assembler import MemorySectionType, ParseContext, AssemblerDirectives +from .base_types import Program +from .colors import FMT_PARSE +from .exceptions import ParseException +from .tokenizer import Token, TokenType +from .types import SimpleInstruction + + +def parse_instruction(token: Token, args: Tuple[str], context: ParseContext): + if context.section is None or context.section.type != MemorySectionType.Instructions: + raise ParseException("{} {} encountered in invalid context: {}".format(token, args, context)) + ins = SimpleInstruction(token.value, args, context.context, context.section.current_address()) + context.section.data.append(ins) + + +def parse_label(token: Token, args: Tuple[str], context: ParseContext): + name = token.value[:-1] + if re.match(r'^\d+$', name): + # relative label: + context.context.numbered_labels[name].append(context.section.current_address()) + else: + if name in context.context.labels: + print(FMT_PARSE + 'Warn: Symbol {} defined twice!'.format(name)) + context.context.labels[name] = context.section.current_address() + + +PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = { + TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction, + TokenType.LABEL: parse_label, + TokenType.INSTRUCTION_NAME: parse_instruction +} + + +def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program: + context = ParseContext(name) + + for token, args in composite_tokenizer(Peekable[Token](tokens_iter)): + if token.type not in PARSERS: + raise ParseException("Unexpected token type: {}, {}".format(token, args)) + PARSERS[token.type](token, args, context) + + return context.finalize() + + +def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]: + tokens: Peekable[Token] = Peekable[Token](tokens_iter) + + while not tokens.is_empty(): + token = next(tokens) + if token.type in (TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME): + yield token, tuple(take_arguments(tokens)) + + +def take_arguments(tokens: Peekable[Token]) -> Iterable[str]: + """ + Consumes (argument comma)* and yields argument.value until newline is reached + If an argument is not followed by either a newline or a comma, a parse exception is raised + The newline at the end is consumed + :param tokens: A Peekable iterator over some Tokens + """ + while True: + if tokens.peek().type == TokenType.ARGUMENT: + yield next(tokens).value + if tokens.peek().type == TokenType.COMMA: + next(tokens) + elif tokens.peek().type == TokenType.NEWLINE: + next(tokens) + break + raise ParseException("Expected newline, instead got {}".format(tokens.peek())) + diff --git a/riscemu/priv/ElfLoader.py b/riscemu/priv/ElfLoader.py index 886da8a..3a4bc33 100644 --- a/riscemu/priv/ElfLoader.py +++ b/riscemu/priv/ElfLoader.py @@ -2,8 +2,8 @@ from dataclasses import dataclass from typing import List, Dict, Tuple from .Exceptions import * -from ..Exceptions import RiscemuBaseException -from ..Executable import MemoryFlags, LoadedMemorySection +from ..exceptions import RiscemuBaseException +from ..base_types import MemoryFlags, LoadedMemorySection from ..decoder import decode, RISCV_REGS, format_ins from ..helpers import FMT_PARSE, FMT_NONE, FMT_GREEN, FMT_BOLD diff --git a/riscemu/priv/ImageLoader.py b/riscemu/priv/ImageLoader.py index 36bbdce..1e89eee 100644 --- a/riscemu/priv/ImageLoader.py +++ b/riscemu/priv/ImageLoader.py @@ -8,8 +8,8 @@ from typing import Dict, List, Optional, TYPE_CHECKING from .ElfLoader import ElfInstruction, ElfLoadedMemorySection, InstructionAccessFault, InstructionAddressMisalignedTrap from .PrivMMU import PrivMMU -from ..Config import RunConfig -from ..Executable import LoadedMemorySection, MemoryFlags +from ..config import RunConfig +from ..base_types import LoadedMemorySection, MemoryFlags from ..IO.IOModule import IOModule from ..colors import FMT_ERROR, FMT_NONE, FMT_MEM from ..decoder import decode @@ -117,7 +117,7 @@ class MemoryImageMMU(PrivMMU): return "{}{:+x} ({}:{})".format(sym, addr - val, sec.owner, sec.name) return "{}:{}{:+x}".format(sec.owner, sec.name, addr - sec.base) - def symbol(self, symb: str): + def label(self, symb: str): print(FMT_MEM + "Looking up symbol {}".format(symb)) for owner, symbs in self.debug_info['symbols'].items(): if symb in symbs: diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index 043eb85..5297060 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -15,7 +15,7 @@ from ..IO import TextIO from ..instructions import RV32A, RV32M if typing.TYPE_CHECKING: - from riscemu import Executable, LoadedExecutable, LoadedInstruction + from riscemu import base_types, LoadedExecutable, LoadedInstruction from riscemu.instructions.InstructionSet import InstructionSet @@ -95,7 +95,7 @@ class PrivCPU(CPU): print() print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE) - def load(self, e: riscemu.Executable): + def load(self, e: riscemu.base_types): raise NotImplementedError("Not supported!") def run_loaded(self, le: 'riscemu.LoadedExecutable'): diff --git a/riscemu/priv/PrivRV32I.py b/riscemu/priv/PrivRV32I.py index ca91f37..278767d 100644 --- a/riscemu/priv/PrivRV32I.py +++ b/riscemu/priv/PrivRV32I.py @@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT """ from ..instructions.RV32I import * -from ..Exceptions import INS_NOT_IMPLEMENTED +from ..exceptions import INS_NOT_IMPLEMENTED from .Exceptions import * from .privmodes import PrivModes from ..colors import FMT_CPU, FMT_NONE diff --git a/riscemu/Registers.py b/riscemu/registers.py similarity index 98% rename from riscemu/Registers.py rename to riscemu/registers.py index df282c1..a3de09a 100644 --- a/riscemu/Registers.py +++ b/riscemu/registers.py @@ -4,10 +4,10 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -from .Config import RunConfig +from .config import RunConfig from .helpers import * from collections import defaultdict -from .Exceptions import InvalidRegisterException +from .exceptions import InvalidRegisterException class Registers: """ diff --git a/riscemu/Syscall.py b/riscemu/syscall.py similarity index 100% rename from riscemu/Syscall.py rename to riscemu/syscall.py diff --git a/riscemu/Tokenizer.py b/riscemu/tokenizer.py similarity index 94% rename from riscemu/Tokenizer.py rename to riscemu/tokenizer.py index db9d330..6e29dd7 100644 --- a/riscemu/Tokenizer.py +++ b/riscemu/tokenizer.py @@ -10,15 +10,16 @@ from enum import Enum, auto from typing import List, Iterable from riscemu.decoder import RISCV_REGS -from .Exceptions import ParseException +from .exceptions import ParseException LINE_COMMENT_STARTERS = ('#', ';', '//') WHITESPACE_PATTERN = re.compile(r'\s+') -MEMORY_ADDRESS_PATTERN = re.compile('^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$') +MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$') REGISTER_NAMES = RISCV_REGS I = lambda x: x + class TokenType(Enum): COMMA = auto() ARGUMENT = auto() @@ -40,6 +41,7 @@ class Token: return ', ' return '{}({}) '.format(self.type.name[0:3], self.value) + NEWLINE = Token(TokenType.NEWLINE, '\n') COMMA = Token(TokenType.COMMA, ',') diff --git a/riscemu/types.py b/riscemu/types.py new file mode 100644 index 0000000..49791b2 --- /dev/null +++ b/riscemu/types.py @@ -0,0 +1,72 @@ +from typing import List, Tuple +from .exceptions import MemoryAccessException +from .helpers import parse_numeric_argument +from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \ + T_AbsoluteAddress + + +class SimpleInstruction(Instruction): + def __init__(self, name: str, args: Tuple[str], context: InstructionContext, addr: T_RelativeAddress): + self.context = context + self.name = name + self.args = args + self.addr = addr + + def get_imm(self, num: int) -> int: + resolved_label = self.context.resolve_label(self.args[num], self.addr) + if resolved_label is None: + return parse_numeric_argument(self.args[num]) + return resolved_label + + def get_imm_reg(self, num: int) -> Tuple[int, str]: + return self.get_imm(num + 1), self.get_reg(num) + + def get_reg(self, num: int) -> str: + return self.args[num] + + +class InstructionMemorySection(MemorySection): + def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, base: int = 0): + self.name = name + self.base = base + self.context = context + self.size = len(instructions) * 4 + self.flags = MemoryFlags(True, True) + self.instructions = instructions + + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read') + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write') + + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + if offset % 4 != 0: + raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch') + return self.instructions[offset // 4] + + +class BinaryDataMemorySection(MemorySection): + def __init__(self, data: bytearray, name: str, context: InstructionContext, base: int = 0): + self.name = name + self.base = base + self.context = context + self.size = len(data) + self.flags = MemoryFlags(False, False) + self.data = data + + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + if offset + size > self.size: + raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read') + return self.data[offset:offset + size] + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + if offset + size > self.size: + raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write') + if len(data[0:size]) != size: + raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write') + self.data[offset:offset + size] = data[0:size] + + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self), + offset, 4, 'instruction fetch') diff --git a/test/test_tokenizer.py b/test/test_tokenizer.py index 659abf1..dc6c410 100644 --- a/test/test_tokenizer.py +++ b/test/test_tokenizer.py @@ -1,6 +1,6 @@ from unittest import TestCase -from riscemu.Tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA +from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA def ins(name: str) -> Token: From 0488a9d6bc3efdb23e51eeae9f67573a58164ee6 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Tue, 18 Jan 2022 21:08:07 +0100 Subject: [PATCH 04/30] finished basic RISC-V parser --- .idea/riscemu.iml | 1 + riscemu/__init__.py | 4 +- riscemu/assembler.py | 71 +++++++++++++------ riscemu/base_types.py | 8 ++- riscemu/exceptions.py | 4 +- riscemu/helpers.py | 4 +- riscemu/instructions/InstructionSet.py | 17 ++--- riscemu/instructions/RV32A.py | 24 +++---- riscemu/instructions/RV32I.py | 96 +++++++++++++------------- riscemu/instructions/RV32M.py | 16 ++--- riscemu/parser.py | 27 +++++++- riscemu/tokenizer.py | 47 +++++++++++-- riscemu/types.py | 8 ++- test/__init__.py | 2 + test/test_helpers.py | 2 +- test/test_tokenizer.py | 49 ++++++++++++- 16 files changed, 260 insertions(+), 120 deletions(-) diff --git a/.idea/riscemu.iml b/.idea/riscemu.iml index 74d515a..8ed6672 100644 --- a/.idea/riscemu.iml +++ b/.idea/riscemu.iml @@ -2,6 +2,7 @@ + diff --git a/riscemu/__init__.py b/riscemu/__init__.py index d580e2b..6319b86 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -11,7 +11,7 @@ It contains everything needed to run assembly files, so you don't need any custo from .exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException -from .base_types import Executable, LoadedExecutable, LoadedMemorySection +#from .base_types import Executable, LoadedExecutable, LoadedMemorySection from .instructions import * @@ -22,6 +22,8 @@ from .CPU import CPU from .config import RunConfig +from .parser import tokenize, parse_tokens, parse_program_from_file + __author__ = "Anton Lydike " __copyright__ = "Copyright 2021 Anton Lydike" __version__ = '1.0.0' \ No newline at end of file diff --git a/riscemu/assembler.py b/riscemu/assembler.py index c8c7546..1ec9731 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -1,16 +1,14 @@ -from typing import Optional, Tuple, Union +from typing import Optional, Tuple, Union, List from enum import Enum, auto from typing import Optional, Tuple, Union -from helpers import parse_numeric_argument -from .base_types import Program, T_RelativeAddress, InstructionContext +from .helpers import parse_numeric_argument, align_addr, int_to_bytes +from .base_types import Program, T_RelativeAddress, InstructionContext, Instruction from .colors import FMT_PARSE, FMT_NONE -from .exceptions import ParseException -from .helpers import ASSERT_LEN +from .exceptions import ParseException, ASSERT_LEN, ASSERT_NOT_NULL from .tokenizer import Token from .types import BinaryDataMemorySection, InstructionMemorySection - INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini') @@ -21,13 +19,25 @@ class MemorySectionType(Enum): class CurrentSection: name: str - data: Union[list, bytearray] + data: Union[List[Instruction], bytearray] type: MemorySectionType + base: int + + def __init__(self, name: str, type: MemorySectionType, base: int = 0): + self.name = name + self.type = type + self.base = base + if self.type == MemorySectionType.Data: + self.data = bytearray() + elif self.type == MemorySectionType.Instructions: + self.data = list() + else: + raise ParseException("Unknown section type: {}".format(type)) def current_address(self) -> T_RelativeAddress: if self.type == MemorySectionType.Data: - return len(self.data) - return len(self.data) * 4 + return len(self.data) + self.base + return len(self.data) * 4 + self.base def __repr__(self): return "{}(name={},data={},type={})".format( @@ -47,18 +57,27 @@ class ParseContext: self.section = None def finalize(self) -> Program: - self.finalize_section() + self._finalize_section() return self.program - def finalize_section(self): + def _finalize_section(self): if self.section is None: return if self.section.type == MemorySectionType.Data: - section = BinaryDataMemorySection(self.section.data, self.section.name, self.context) + section = BinaryDataMemorySection(self.section.data, self.section.name, self.context, self.program) self.program.add_section(section) elif self.section.type == MemorySectionType.Instructions: - section = InstructionMemorySection(self.section.data, self.section.name, self.context) + section = InstructionMemorySection(self.section.data, self.section.name, self.context, self.program) self.program.add_section(section) + self.section = None + + def new_section(self, name: str, type: MemorySectionType): + base = 0 + if self.section is not None: + base = align_addr(self.section.current_address(), 4) + print("base at {}".format(base)) + self._finalize_section() + self.section = CurrentSection(name, type, base) def __repr__(self): return "{}(\n\tsetion={},\n\tprogram={}\n)".format( @@ -100,21 +119,20 @@ class AssemblerDirectives: @classmethod def op_section(cls, token: Token, args: Tuple[str], context: ParseContext): ASSERT_LEN(args, 1) - context.finalize_section() - if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES: - context.section.type = MemorySectionType.Instructions - context.section.data = list() + context.new_section(args[0], MemorySectionType.Instructions) else: - context.section.type = MemorySectionType.Data - context.section.data = bytearray() - context.section.name = args[0] + context.new_section(args[0], MemorySectionType.Data) @classmethod def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext): ASSERT_LEN(args, 1) context.program.global_labels.add(args[0]) + @classmethod + def op_global(cls, token: Token, args: Tuple[str], context: ParseContext): + cls.op_globl(token, args, context) + @classmethod def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext): ASSERT_LEN(args, 2) @@ -122,6 +140,14 @@ class AssemblerDirectives: value = parse_numeric_argument(args[1]) context.context.labels[name] = value + @classmethod + def op_space(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 1) + ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data) + + size = parse_numeric_argument(args[0]) + cls.add_bytes(size, None, context) + @classmethod def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext): ASSERT_LEN(args, 1) @@ -130,11 +156,14 @@ class AssemblerDirectives: cls.add_bytes(size, bytearray(size), context) @classmethod - def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext): + def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext, unsigned=False): ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data) if content is None: content = bytearray(size) + if isinstance(context, int): + content = int_to_bytes(content, size, unsigned) + context.section.data += content @classmethod def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True): diff --git a/riscemu/base_types.py b/riscemu/base_types.py index 0bf92b7..43dfb73 100644 --- a/riscemu/base_types.py +++ b/riscemu/base_types.py @@ -7,12 +7,14 @@ This file contains base classes which represent loaded programs """ import re -from abc import ABC +from abc import ABC, abstractmethod +from collections import defaultdict from dataclasses import dataclass from typing import Dict, List, Optional, Tuple, Set -from collections import defaultdict -from .helpers import * +from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE +from .exceptions import ParseException +from .helpers import format_bytes T_RelativeAddress = int T_AbsoluteAddress = int diff --git a/riscemu/exceptions.py b/riscemu/exceptions.py index b75b358..fd6f130 100644 --- a/riscemu/exceptions.py +++ b/riscemu/exceptions.py @@ -4,8 +4,6 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -import typing - from abc import abstractmethod from .base_types import Instruction from .colors import * @@ -21,7 +19,7 @@ class RiscemuBaseException(BaseException): class ParseException(RiscemuBaseException): def __init__(self, msg, data=None): - super().__init__() + super().__init__(msg, data) self.msg = msg self.data = data diff --git a/riscemu/helpers.py b/riscemu/helpers.py index ad8ac06..9946b5d 100644 --- a/riscemu/helpers.py +++ b/riscemu/helpers.py @@ -6,7 +6,7 @@ SPDX-License-Identifier: MIT from math import log10, ceil from .exceptions import * -from typing import Iterable, Iterator, TypeVar, Generic, List +from typing import Iterable, Iterator, TypeVar, Generic, List, Optional def align_addr(addr: int, to_bytes: int = 8) -> int: @@ -124,7 +124,7 @@ class Peekable(Generic[T], Iterator[T]): return self.cache.pop() return next(self.iterable) - def peek(self) -> T: + def peek(self) -> Optional[T]: try: if self.cache: return self.cache[0] diff --git a/riscemu/instructions/InstructionSet.py b/riscemu/instructions/InstructionSet.py index 6666dc9..b6a19b7 100644 --- a/riscemu/instructions/InstructionSet.py +++ b/riscemu/instructions/InstructionSet.py @@ -8,8 +8,9 @@ from typing import Tuple, Callable, Dict from abc import ABC from ..CPU import CPU -from ..helpers import ASSERT_LEN, ASSERT_IN, to_unsigned -from ..base_types import LoadedInstruction +from ..helpers import to_unsigned +from ..exceptions import ASSERT_LEN, ASSERT_IN +from ..base_types import Instruction class InstructionSet(ABC): @@ -30,7 +31,7 @@ class InstructionSet(ABC): self.name = self.__class__.__name__ self.cpu = cpu - def load(self) -> Dict[str, Callable[['LoadedInstruction'], None]]: + def load(self) -> Dict[str, Callable[['Instruction'], None]]: """ This is called by the CPU once it instantiates this instruction set @@ -51,7 +52,7 @@ class InstructionSet(ABC): if member.startswith('instruction_'): yield member[12:].replace('_', '.'), getattr(self, member) - def parse_mem_ins(self, ins: 'LoadedInstruction') -> Tuple[str, int]: + def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, int]: """ parses both rd, rs, imm and rd, imm(rs) argument format and returns (rd, imm+rs1) (so a register and address tuple for memory instructions) @@ -69,7 +70,7 @@ class InstructionSet(ABC): rd = ins.get_reg(0) return rd, rs + imm - def parse_rd_rs_rs(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]: + def parse_rd_rs_rs(self, ins: 'Instruction', signed=True) -> Tuple[str, int, int]: """ Assumes the command is in rd, rs1, rs2 format Returns the name of rd, and the values in rs1 and rs2 @@ -84,7 +85,7 @@ class InstructionSet(ABC): to_unsigned(self.get_reg_content(ins, 1)), \ to_unsigned(self.get_reg_content(ins, 2)) - def parse_rd_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]: + def parse_rd_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[str, int, int]: """ Assumes the command is in rd, rs, imm format Returns the name of rd, the value in rs and the immediate imm @@ -99,7 +100,7 @@ class InstructionSet(ABC): to_unsigned(self.get_reg_content(ins, 1)), \ to_unsigned(ins.get_imm(2)) - def parse_rs_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[int, int, int]: + def parse_rs_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[int, int, int]: """ Assumes the command is in rs1, rs2, imm format Returns the values in rs1, rs2 and the immediate imm @@ -113,7 +114,7 @@ class InstructionSet(ABC): to_unsigned(self.get_reg_content(ins, 1)), \ to_unsigned(ins.get_imm(2)) - def get_reg_content(self, ins: 'LoadedInstruction', ind: int) -> int: + def get_reg_content(self, ins: 'Instruction', ind: int) -> int: """ get the register name from ins and then return the register contents """ diff --git a/riscemu/instructions/RV32A.py b/riscemu/instructions/RV32A.py index 3de2383..ba6a8a6 100644 --- a/riscemu/instructions/RV32A.py +++ b/riscemu/instructions/RV32A.py @@ -1,4 +1,4 @@ -from .InstructionSet import InstructionSet, LoadedInstruction +from .InstructionSet import InstructionSet, Instruction from ..exceptions import INS_NOT_IMPLEMENTED from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed @@ -10,13 +10,13 @@ class RV32A(InstructionSet): for this? """ - def instruction_lr_w(self, ins: 'LoadedInstruction'): + def instruction_lr_w(self, ins: 'Instruction'): INS_NOT_IMPLEMENTED(ins) - def instruction_sc_w(self, ins: 'LoadedInstruction'): + def instruction_sc_w(self, ins: 'Instruction'): INS_NOT_IMPLEMENTED(ins) - def instruction_amoswap_w(self, ins: 'LoadedInstruction'): + def instruction_amoswap_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) if dest == 'zero': self.mmu.write(addr, int_to_bytes(addr, 4)) @@ -25,37 +25,37 @@ class RV32A(InstructionSet): self.mmu.write(addr, int_to_bytes(val, 4)) self.regs.set(dest, old) - def instruction_amoadd_w(self, ins: 'LoadedInstruction'): + def instruction_amoadd_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) old = int_from_bytes(self.mmu.read(addr, 4)) self.mmu.write(addr, int_to_bytes(old + val, 4)) self.regs.set(dest, old) - def instruction_amoand_w(self, ins: 'LoadedInstruction'): + def instruction_amoand_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) old = int_from_bytes(self.mmu.read(addr, 4)) self.mmu.write(addr, int_to_bytes(old & val, 4)) self.regs.set(dest, old) - def instruction_amoor_w(self, ins: 'LoadedInstruction'): + def instruction_amoor_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) old = int_from_bytes(self.mmu.read(addr, 4)) self.mmu.write(addr, int_to_bytes(old | val, 4)) self.regs.set(dest, old) - def instruction_amoxor_w(self, ins: 'LoadedInstruction'): + def instruction_amoxor_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) old = int_from_bytes(self.mmu.read(addr, 4)) self.mmu.write(addr, int_to_bytes(old ^ val, 4)) self.regs.set(dest, old) - def instruction_amomax_w(self, ins: 'LoadedInstruction'): + def instruction_amomax_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) old = int_from_bytes(self.mmu.read(addr, 4)) self.mmu.write(addr, int_to_bytes(max(old, val), 4)) self.regs.set(dest, old) - def instruction_amomaxu_w(self, ins: 'LoadedInstruction'): + def instruction_amomaxu_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) val = to_unsigned(val) old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True) @@ -63,13 +63,13 @@ class RV32A(InstructionSet): self.mmu.write(addr, int_to_bytes(to_signed(max(old, val)), 4)) self.regs.set(dest, old) - def instruction_amomin_w(self, ins: 'LoadedInstruction'): + def instruction_amomin_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) old = int_from_bytes(self.mmu.read(addr, 4)) self.mmu.write(addr, int_to_bytes(min(old, val), 4)) self.regs.set(dest, old) - def instruction_amominu_w(self, ins: 'LoadedInstruction'): + def instruction_amominu_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) val = to_unsigned(val) old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True) diff --git a/riscemu/instructions/RV32I.py b/riscemu/instructions/RV32I.py index cb19af6..5a30b5f 100644 --- a/riscemu/instructions/RV32I.py +++ b/riscemu/instructions/RV32I.py @@ -11,7 +11,7 @@ from ..colors import FMT_DEBUG, FMT_NONE from ..debug import launch_debug_session from ..exceptions import LaunchDebuggerException from ..syscall import Syscall -from ..base_types import LoadedInstruction +from ..base_types import Instruction class RV32I(InstructionSet): @@ -23,39 +23,39 @@ class RV32I(InstructionSet): See https://maxvytech.com/images/RV32I-11-2018.pdf for a more detailed overview """ - def instruction_lb(self, ins: 'LoadedInstruction'): + def instruction_lb(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1))) - def instruction_lh(self, ins: 'LoadedInstruction'): + def instruction_lh(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2))) - def instruction_lw(self, ins: 'LoadedInstruction'): + def instruction_lw(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 4))) - def instruction_lbu(self, ins: 'LoadedInstruction'): + def instruction_lbu(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1), unsigned=True)) - def instruction_lhu(self, ins: 'LoadedInstruction'): + def instruction_lhu(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2), unsigned=True)) - def instruction_sb(self, ins: 'LoadedInstruction'): + def instruction_sb(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) self.mmu.write(addr, 1, int_to_bytes(self.regs.get(rd), 1)) - def instruction_sh(self, ins: 'LoadedInstruction'): + def instruction_sh(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) self.mmu.write(addr, 2, int_to_bytes(self.regs.get(rd), 2)) - def instruction_sw(self, ins: 'LoadedInstruction'): + def instruction_sw(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) self.mmu.write(addr, 4, int_to_bytes(self.regs.get(rd), 4)) - def instruction_sll(self, ins: 'LoadedInstruction'): + def instruction_sll(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) dst = ins.get_reg(0) src1 = ins.get_reg(1) @@ -65,7 +65,7 @@ class RV32I(InstructionSet): to_signed(to_unsigned(self.regs.get(src1)) << (self.regs.get(src2) & 0b11111)) ) - def instruction_slli(self, ins: 'LoadedInstruction'): + def instruction_slli(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) dst = ins.get_reg(0) src1 = ins.get_reg(1) @@ -75,7 +75,7 @@ class RV32I(InstructionSet): to_signed(to_unsigned(self.regs.get(src1)) << (imm & 0b11111)) ) - def instruction_srl(self, ins: 'LoadedInstruction'): + def instruction_srl(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) dst = ins.get_reg(0) src1 = ins.get_reg(1) @@ -85,7 +85,7 @@ class RV32I(InstructionSet): to_signed(to_unsigned(self.regs.get(src1)) >> (self.regs.get(src2) & 0b11111)) ) - def instruction_srli(self, ins: 'LoadedInstruction'): + def instruction_srli(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) dst = ins.get_reg(0) src1 = ins.get_reg(1) @@ -95,7 +95,7 @@ class RV32I(InstructionSet): to_signed(to_unsigned(self.regs.get(src1)) >> (imm & 0b11111)) ) - def instruction_sra(self, ins: 'LoadedInstruction'): + def instruction_sra(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) dst = ins.get_reg(0) src1 = ins.get_reg(1) @@ -105,7 +105,7 @@ class RV32I(InstructionSet): self.regs.get(src1) >> (self.regs.get(src2) & 0b11111) ) - def instruction_srai(self, ins: 'LoadedInstruction'): + def instruction_srai(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) dst = ins.get_reg(0) src1 = ins.get_reg(1) @@ -115,7 +115,7 @@ class RV32I(InstructionSet): self.regs.get(src1) >> (imm & 0b11111) ) - def instruction_add(self, ins: 'LoadedInstruction'): + def instruction_add(self, ins: 'Instruction'): dst = "" if self.cpu.conf.add_accept_imm: try: @@ -130,139 +130,139 @@ class RV32I(InstructionSet): rs1 + rs2 ) - def instruction_addi(self, ins: 'LoadedInstruction'): + def instruction_addi(self, ins: 'Instruction'): dst, rs1, imm = self.parse_rd_rs_imm(ins) self.regs.set( dst, rs1 + imm ) - def instruction_sub(self, ins: 'LoadedInstruction'): + def instruction_sub(self, ins: 'Instruction'): dst, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( dst, rs1 - rs2 ) - def instruction_lui(self, ins: 'LoadedInstruction'): + def instruction_lui(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) imm = ins.get_imm(1) self.regs.set(reg, imm << 12) - def instruction_auipc(self, ins: 'LoadedInstruction'): + def instruction_auipc(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) imm = to_unsigned(ins.get_imm(1)) self.regs.set(reg, self.pc + (imm << 12)) - def instruction_xor(self, ins: 'LoadedInstruction'): + def instruction_xor(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( rd, rs1 ^ rs2 ) - def instruction_xori(self, ins: 'LoadedInstruction'): + def instruction_xori(self, ins: 'Instruction'): rd, rs1, imm = self.parse_rd_rs_imm(ins) self.regs.set( rd, rs1 ^ imm ) - def instruction_or(self, ins: 'LoadedInstruction'): + def instruction_or(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( rd, rs1 | rs2 ) - def instruction_ori(self, ins: 'LoadedInstruction'): + def instruction_ori(self, ins: 'Instruction'): rd, rs1, imm = self.parse_rd_rs_imm(ins) self.regs.set( rd, rs1 | imm ) - def instruction_and(self, ins: 'LoadedInstruction'): + def instruction_and(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( rd, rs1 & rs2 ) - def instruction_andi(self, ins: 'LoadedInstruction'): + def instruction_andi(self, ins: 'Instruction'): rd, rs1, imm = self.parse_rd_rs_imm(ins) self.regs.set( rd, rs1 & imm ) - def instruction_slt(self, ins: 'LoadedInstruction'): + def instruction_slt(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( rd, int(rs1 < rs2) ) - def instruction_slti(self, ins: 'LoadedInstruction'): + def instruction_slti(self, ins: 'Instruction'): rd, rs1, imm = self.parse_rd_rs_imm(ins) self.regs.set( rd, int(rs1 < imm) ) - def instruction_sltu(self, ins: 'LoadedInstruction'): + def instruction_sltu(self, ins: 'Instruction'): dst, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False) self.regs.set( dst, int(rs1 < rs2) ) - def instruction_sltiu(self, ins: 'LoadedInstruction'): + def instruction_sltiu(self, ins: 'Instruction'): dst, rs1, imm = self.parse_rd_rs_imm(ins, signed=False) self.regs.set( dst, int(rs1 < imm) ) - def instruction_beq(self, ins: 'LoadedInstruction'): + def instruction_beq(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 == rs2: self.pc = dst - def instruction_bne(self, ins: 'LoadedInstruction'): + def instruction_bne(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 != rs2: self.pc = dst - def instruction_blt(self, ins: 'LoadedInstruction'): + def instruction_blt(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 < rs2: self.pc = dst - def instruction_bge(self, ins: 'LoadedInstruction'): + def instruction_bge(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 >= rs2: self.pc = dst - def instruction_bltu(self, ins: 'LoadedInstruction'): + def instruction_bltu(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False) if rs1 < rs2: self.pc = dst - def instruction_bgeu(self, ins: 'LoadedInstruction'): + def instruction_bgeu(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False) if rs1 >= rs2: self.pc = dst # technically deprecated - def instruction_j(self, ins: 'LoadedInstruction'): + def instruction_j(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 1) addr = ins.get_imm(0) self.pc = addr - def instruction_jal(self, ins: 'LoadedInstruction'): + def instruction_jal(self, ins: 'Instruction'): reg = 'ra' # default register is ra if len(ins.args) == 1: addr = ins.get_imm(0) @@ -273,29 +273,29 @@ class RV32I(InstructionSet): self.regs.set(reg, self.pc) self.pc = addr - def instruction_jalr(self, ins: 'LoadedInstruction'): + def instruction_jalr(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) addr = ins.get_imm(1) self.regs.set(reg, self.pc) self.pc = addr - def instruction_ret(self, ins: 'LoadedInstruction'): + def instruction_ret(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 0) self.pc = self.regs.get('ra') - def instruction_ecall(self, ins: 'LoadedInstruction'): + def instruction_ecall(self, ins: 'Instruction'): self.instruction_scall(ins) - def instruction_ebreak(self, ins: 'LoadedInstruction'): + def instruction_ebreak(self, ins: 'Instruction'): self.instruction_sbreak(ins) - def instruction_scall(self, ins: 'LoadedInstruction'): + def instruction_scall(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 0) syscall = Syscall(self.regs.get('a7'), self.cpu) self.cpu.syscall_int.handle_syscall(syscall) - def instruction_sbreak(self, ins: 'LoadedInstruction'): + def instruction_sbreak(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 0) if self.cpu.active_debug: print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE) @@ -307,23 +307,23 @@ class RV32I(InstructionSet): "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) ) - def instruction_nop(self, ins: 'LoadedInstruction'): + def instruction_nop(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 0) pass - def instruction_li(self, ins: 'LoadedInstruction'): + def instruction_li(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) immediate = ins.get_imm(1) self.regs.set(reg, immediate) - def instruction_la(self, ins: 'LoadedInstruction'): + def instruction_la(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) immediate = ins.get_imm(1) self.regs.set(reg, immediate) - def instruction_mv(self, ins: 'LoadedInstruction'): + def instruction_mv(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) rd, rs = ins.get_reg(0), ins.get_reg(1) self.regs.set(rd, self.regs.get(rs)) diff --git a/riscemu/instructions/RV32M.py b/riscemu/instructions/RV32M.py index 5b1412f..31b9341 100644 --- a/riscemu/instructions/RV32M.py +++ b/riscemu/instructions/RV32M.py @@ -12,48 +12,48 @@ class RV32M(InstructionSet): """ The RV32M Instruction set, containing multiplication and division instructions """ - def instruction_mul(self, ins: 'LoadedInstruction'): + def instruction_mul(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( rd, rs1 * rs2 ) - def instruction_mulh(self, ins: 'LoadedInstruction'): + def instruction_mulh(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( rd, (rs1 * rs2) >> 32 ) - def instruction_mulhsu(self, ins: 'LoadedInstruction'): + def instruction_mulhsu(self, ins: 'Instruction'): INS_NOT_IMPLEMENTED(ins) - def instruction_mulhu(self, ins: 'LoadedInstruction'): + def instruction_mulhu(self, ins: 'Instruction'): INS_NOT_IMPLEMENTED(ins) - def instruction_div(self, ins: 'LoadedInstruction'): + def instruction_div(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( rd, rs1 // rs2 ) - def instruction_divu(self, ins: 'LoadedInstruction'): + def instruction_divu(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False) self.regs.set( rd, rs1 // rs2 ) - def instruction_rem(self, ins: 'LoadedInstruction'): + def instruction_rem(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( rd, rs1 % rs2 ) - def instruction_remu(self, ins: 'LoadedInstruction'): + def instruction_remu(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False) self.regs.set( rd, diff --git a/riscemu/parser.py b/riscemu/parser.py index 64cefe8..7186d99 100644 --- a/riscemu/parser.py +++ b/riscemu/parser.py @@ -3,15 +3,16 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ +import os import re from typing import Dict, Tuple, Iterable, Callable -from helpers import Peekable +from .helpers import Peekable from .assembler import MemorySectionType, ParseContext, AssemblerDirectives from .base_types import Program from .colors import FMT_PARSE from .exceptions import ParseException -from .tokenizer import Token, TokenType +from .tokenizer import Token, TokenType, tokenize from .types import SimpleInstruction @@ -41,17 +42,32 @@ PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = { def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program: + """ + Convert a token stream into a parsed program + :param name: the programs name + :param tokens_iter: the programs content, tokenized + :return: a parsed program + """ context = ParseContext(name) for token, args in composite_tokenizer(Peekable[Token](tokens_iter)): if token.type not in PARSERS: raise ParseException("Unexpected token type: {}, {}".format(token, args)) + print("{} {}".format(token, args)) PARSERS[token.type](token, args, context) return context.finalize() def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]: + """ + Convert an iterator over tokens into an iterator over tuples: (token, list(token)) + + The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before + a newline is encountered + :param tokens_iter: An iterator over tokens + :return: An iterator over a slightly more structured representation of the tokens + """ tokens: Peekable[Token] = Peekable[Token](tokens_iter) while not tokens.is_empty(): @@ -75,5 +91,10 @@ def take_arguments(tokens: Peekable[Token]) -> Iterable[str]: elif tokens.peek().type == TokenType.NEWLINE: next(tokens) break - raise ParseException("Expected newline, instead got {}".format(tokens.peek())) + break + #raise ParseException("Expected newline, instead got {}".format(tokens.peek())) + +def parse_program_from_file(path: str) -> Program: + with open(path, 'r') as f: + return parse_tokens(os.path.split(path)[-1], tokenize(f)) diff --git a/riscemu/tokenizer.py b/riscemu/tokenizer.py index 6e29dd7..2820a09 100644 --- a/riscemu/tokenizer.py +++ b/riscemu/tokenizer.py @@ -7,7 +7,7 @@ SPDX-License-Identifier: MIT import re from dataclasses import dataclass from enum import Enum, auto -from typing import List, Iterable +from typing import List, Iterable, Optional from riscemu.decoder import RISCV_REGS from .exceptions import ParseException @@ -17,8 +17,6 @@ WHITESPACE_PATTERN = re.compile(r'\s+') MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$') REGISTER_NAMES = RISCV_REGS -I = lambda x: x - class TokenType(Enum): COMMA = auto() @@ -39,7 +37,7 @@ class Token: return '\\n' if self.type == TokenType.COMMA: return ', ' - return '{}({}) '.format(self.type.name[0:3], self.value) + return '{}({})'.format(self.type.name[0:3], self.value) NEWLINE = Token(TokenType.NEWLINE, '\n') @@ -55,7 +53,7 @@ def tokenize(input: Iterable[str]) -> Iterable[Token]: if not line: continue - parts = list(part for part in re.split(WHITESPACE_PATTERN, line) if part) + parts = list(part for part in split_whitespace_respecting_quotes(line) if part) yield from parse_line(parts) yield NEWLINE @@ -70,6 +68,8 @@ def parse_line(parts: List[str]) -> Iterable[Token]: yield Token(TokenType.PSEUDO_OP, first_token) elif first_token[-1] == ':': yield Token(TokenType.LABEL, first_token) + yield from parse_line(parts[1:]) + return else: yield Token(TokenType.INSTRUCTION_NAME, first_token) @@ -100,3 +100,40 @@ def print_tokens(tokens: Iterable[Token]): for token in tokens: print(token, end='\n' if token == NEWLINE else '') print("", flush=True, end="") + + +def split_whitespace_respecting_quotes(line: str) -> Iterable[str]: + quote = "" + part = "" + for c in line: + if c == quote: + yield part + part = "" + quote = "" + continue + + if quote != "": + part += c + continue + + if c in "\"'": + if part: + yield part + quote = c + part = "" + continue + + if c in ' \t\n': + if part: + yield part + part = "" + continue + + part += c + + if part: + yield part + + + + diff --git a/riscemu/types.py b/riscemu/types.py index 49791b2..998eba1 100644 --- a/riscemu/types.py +++ b/riscemu/types.py @@ -2,7 +2,7 @@ from typing import List, Tuple from .exceptions import MemoryAccessException from .helpers import parse_numeric_argument from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \ - T_AbsoluteAddress + T_AbsoluteAddress, Program class SimpleInstruction(Instruction): @@ -26,13 +26,14 @@ class SimpleInstruction(Instruction): class InstructionMemorySection(MemorySection): - def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, base: int = 0): + def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: Program, base: int = 0): self.name = name self.base = base self.context = context self.size = len(instructions) * 4 self.flags = MemoryFlags(True, True) self.instructions = instructions + self.owner = owner.name def read(self, offset: T_RelativeAddress, size: int) -> bytearray: raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read') @@ -47,13 +48,14 @@ class InstructionMemorySection(MemorySection): class BinaryDataMemorySection(MemorySection): - def __init__(self, data: bytearray, name: str, context: InstructionContext, base: int = 0): + def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: Program, base: int = 0): self.name = name self.base = base self.context = context self.size = len(data) self.flags = MemoryFlags(False, False) self.data = data + self.owner = owner.name def read(self, offset: T_RelativeAddress, size: int) -> bytearray: if offset + size > self.size: diff --git a/test/__init__.py b/test/__init__.py index e69de29..8030002 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -0,0 +1,2 @@ +from .test_tokenizer import * +from .test_helpers import * \ No newline at end of file diff --git a/test/test_helpers.py b/test/test_helpers.py index bc8ef0d..60d93b0 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -3,7 +3,7 @@ from unittest import TestCase from riscemu.helpers import * -class Test(TestCase): +class TestHelpers(TestCase): def test_int_to_bytes(self): self.assertEqual(int_to_bytes(-1), bytearray([0xff] * 4), "-1") self.assertEqual(int_to_bytes(1), bytearray([0, 0, 0, 1]), "1") diff --git a/test/test_tokenizer.py b/test/test_tokenizer.py index dc6c410..9eed365 100644 --- a/test/test_tokenizer.py +++ b/test/test_tokenizer.py @@ -1,6 +1,7 @@ from unittest import TestCase -from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA +from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA, \ + split_whitespace_respecting_quotes def ins(name: str) -> Token: @@ -19,7 +20,7 @@ def lbl(name: str) -> Token: return Token(TokenType.LABEL, name) -class Test(TestCase): +class TestTokenizer(TestCase): def test_instructions(self): program = [ @@ -79,3 +80,47 @@ section: self.assertEqual(list(tokenize(program.splitlines())), tokens) + def test_split_whitespace_respecting_quotes_single(self): + self.assertEqual( + list(split_whitespace_respecting_quotes("test")), ["test"] + ) + + def test_split_whitespace_respecting_quotes_empty(self): + self.assertEqual( + list(split_whitespace_respecting_quotes("")), [] + ) + + def test_split_whitespace_respecting_quotes_two_parts(self): + self.assertEqual( + list(split_whitespace_respecting_quotes("test 123")), ["test", "123"] + ) + + def test_split_whitespace_respecting_quotes_whole_quoted(self): + self.assertEqual( + list(split_whitespace_respecting_quotes("'test 123'")), ["test 123"] + ) + + def test_split_whitespace_respecting_quotes_double_quotes(self): + self.assertEqual( + list(split_whitespace_respecting_quotes('"test 123"')), ["test 123"] + ) + + def test_split_whitespace_respecting_quotes_quoted_then_normal(self): + self.assertEqual( + list(split_whitespace_respecting_quotes('"test 123" abc')), ["test 123", "abc"] + ) + + def test_split_whitespace_respecting_quotes_quoted_sorrounded(self): + self.assertEqual( + list(split_whitespace_respecting_quotes('hello "test 123" abc')), ["hello", "test 123", "abc"] + ) + + def test_split_whitespace_respecting_quotes_weird_spaces(self): + self.assertEqual( + list(split_whitespace_respecting_quotes('hello "test 123"\tabc')), ["hello", "test 123", "abc"] + ) + + def test_split_whitespace_respecting_quotes_quotes_no_spaces(self): + self.assertEqual( + list(split_whitespace_respecting_quotes('hello"test 123"abc')), ["hello", "test 123", "abc"] + ) From 5538034f8b8e1b5a7f1c2c5a8b52801a810dca16 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Fri, 11 Feb 2022 13:32:02 +0100 Subject: [PATCH 05/30] started with base type overhaul --- LICENSE | 2 +- riscemu/CPU.py | 52 +-- riscemu/MMU.py | 84 ++++- riscemu/__init__.py | 2 +- riscemu/assembler.py | 28 +- riscemu/base.py | 81 +++++ riscemu/base_types.py | 188 ----------- riscemu/debug.py | 2 +- riscemu/exceptions.py | 7 +- riscemu/helpers.py | 6 + riscemu/instructions/InstructionSet.py | 2 +- riscemu/instructions/RV32I.py | 2 +- riscemu/parser.py | 43 ++- riscemu/priv/ElfLoader.py | 158 +++------ riscemu/priv/Exceptions.py | 36 +- riscemu/priv/ImageLoader.py | 192 ++++------- riscemu/priv/PrivCPU.py | 14 +- riscemu/priv/PrivRV32I.py | 44 +-- riscemu/priv/types.py | 140 ++++++++ riscemu/syscall.py | 9 +- riscemu/tokenizer.py | 4 - riscemu/types.py | 440 ++++++++++++++++++++++--- 22 files changed, 939 insertions(+), 597 deletions(-) create mode 100644 riscemu/base.py delete mode 100644 riscemu/base_types.py create mode 100644 riscemu/priv/types.py diff --git a/LICENSE b/LICENSE index e3f96f8..c650be9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2021 Anton Lydike +Copyright (c) 2021-2022 Anton Lydike Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 46a10ce..2b72a66 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -9,7 +9,7 @@ on them. import sys from typing import Tuple, List, Dict, Callable, Type -from .base_types import MemoryFlags +from .types import MemoryFlags from .syscall import SyscallInterface, get_syscall_symbols from .exceptions import RiscemuBaseException, LaunchDebuggerException from .MMU import MMU @@ -23,7 +23,7 @@ import riscemu import typing if typing.TYPE_CHECKING: - from . import base_types, LoadedExecutable, LoadedInstruction + from . import types, LoadedExecutable, LoadedInstruction from .instructions.InstructionSet import InstructionSet @@ -34,7 +34,7 @@ class CPU: It is initialized with a configuration and a list of instruction sets. """ - INS_XLEN = 1 + INS_XLEN = 4 def __init__(self, conf: RunConfig, instruction_sets: List[Type['riscemu.InstructionSet']]): """ @@ -70,34 +70,6 @@ class CPU: if conf.include_scall_symbols: self.mmu.global_symbols.update(get_syscall_symbols()) - def get_tokenizer(self, tokenizer_input): - """ - Returns a tokenizer that respects the language of the CPU - - :param tokenizer_input: an instance of the RiscVTokenizerInput class - """ - return RiscVTokenizer(tokenizer_input, self.all_instructions()) - - def load(self, e: riscemu.base_types): - """ - Load an executable into Memory - """ - return self.mmu.load_bin(e) - - def run_loaded(self, le: 'riscemu.LoadedExecutable'): - """ - Run a loaded executable - """ - self.pc = le.run_ptr - - if self.conf.stack_size > 0: - self.stack = self.mmu.allocate_section("stack", self.conf.stack_size, MemoryFlags(False, False)) - self.regs.set('sp', self.stack.base + self.stack.size) - print(FMT_CPU + '[CPU] Allocated {} bytes of stack'.format(self.stack.size) + FMT_NONE) - - print(FMT_CPU + '[CPU] Started running from 0x{:08X} ({})'.format(le.run_ptr, le.name) + FMT_NONE) - self._run() - def continue_from_debugger(self, verbose=True): """ called from the debugger to continue running @@ -157,24 +129,6 @@ class CPU: print() print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE) - def run_instruction(self, ins: 'LoadedInstruction'): - """ - Execute a single instruction - - :param ins: The instruction to execute - """ - if ins.name in self.instructions: - self.instructions[ins.name](ins) - else: - # this should never be reached, as unknown instructions are imparseable - raise RuntimeError("Unknown instruction: {}".format(ins)) - - def all_instructions(self) -> List[str]: - """ - Return a list of all instructions this CPU can execute. - """ - return list(self.instructions.keys()) - def __repr__(self): """ Returns a representation of the CPU and some of its state. diff --git a/riscemu/MMU.py b/riscemu/MMU.py index c255eb4..f5d0375 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -4,17 +4,20 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -from .base_types import InstructionContext, Instruction, MemorySection, MemoryFlags, T_RelativeAddress, T_AbsoluteAddress, \ - Program -from .helpers import align_addr, int_from_bytes -from .exceptions import OutOfMemoryException, InvalidAllocationException +from typing import Dict, List, Optional + from .colors import * -from typing import Dict, List, Tuple, Optional +from .exceptions import InvalidAllocationException +from .helpers import align_addr, int_from_bytes +from .types import Instruction, MemorySection, MemoryFlags, T_AbsoluteAddress, \ + Program class MMU: """ - The MemoryManagementUnit (handles loading binaries, and reading/writing data) + The MemoryManagementUnit. This provides a unified interface for reading/writing data from/to memory. + + It also provides various translations for addresses. """ max_size = 0xFFFFFFFF @@ -62,9 +65,9 @@ class MMU: return None def get_bin_containing(self, addr: T_AbsoluteAddress) -> Optional[Program]: - for exe in self.binaries: - if exe.base_addr <= addr < exe.base_addr + exe.size: - return exe + for program in self.programs: + if program.base <= addr < program.base + program.size: + return program return None def read_ins(self, addr: T_AbsoluteAddress) -> Instruction: @@ -140,7 +143,68 @@ class MMU: def read_int(self, addr: int) -> int: return int_from_bytes(self.read(addr, 4)) + def translate_address(self, address: T_AbsoluteAddress) -> str: + # FIXME: proper implementation using the debug info + return str(address) + + def has_continous_free_region(self, start: int, end: int) -> bool: + # if we have no sections we are all good + if len(self.sections) == 0: + return True + # if the last section is located before the start we are also good + if start > self.sections[-1].base + self.sections[-1].size: + return True + + for sec in self.sections: + # skip all sections that end before the required start point + if sec.base + sec.size < start: + continue + # we now have the first section that doesn't end **before** the start point + # if this section starts after the specified end, we are good + if sec.base > end: + return True + # otherwise we can't continue + return False + # if all sections end before the requested start we are good + # technically we shouldn't ever reach this point, but better safe than sorry + return True + + def load_program(self, program: Program, align_to: int = 4): + if program.base is not None: + if not self.has_continous_free_region(program.base, program.base + program.size): + print(FMT_MEM + "Cannot load program {} into desired space (0x{:0x}-0x{:0x}), area occupied.".format( + program.name, program.base, program.base + program.size + ) + FMT_NONE) + raise InvalidAllocationException("Area occupied".format( + program.name, program.base, program.base + program.size + ), program.name, program.size, MemoryFlags(False, True)) + + at_addr = program.base + else: + first_guaranteed_free_address = self.sections[-1].base + self.sections[-1].size + at_addr = align_addr(first_guaranteed_free_address, align_to) + + # trigger the load event to set all addresses in the binary + program.loaded_trigger(at_addr) + + # add program and sections to internal state + self.programs.append(program) + self.sections += program.sections + self._update_state() + + # load all global symbols from program + self.global_symbols.update( + {key: program.context.labels[key] for key in program.global_labels} + ) + # inject reference to global symbol table into program context + # FIXME: this is pretty unclean and should probably be solved in a better way in the future + program.context.global_symbol_dict = self.global_symbols + + def _update_state(self): + self.programs.sort(key=lambda bin: bin.base) + self.sections.sort(key=lambda sec: sec.base) + def __repr__(self): return "MMU(\n\t{}\n)".format( - "\n\t".join(repr(x) for x in self.sections) + "\n\t".join(repr(x) for x in self.programs) ) diff --git a/riscemu/__init__.py b/riscemu/__init__.py index 6319b86..90ba867 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -22,7 +22,7 @@ from .CPU import CPU from .config import RunConfig -from .parser import tokenize, parse_tokens, parse_program_from_file +from .parser import tokenize, parse_tokens, AssemblyFileLoader __author__ = "Anton Lydike " __copyright__ = "Copyright 2021 Anton Lydike" diff --git a/riscemu/assembler.py b/riscemu/assembler.py index 1ec9731..ec63833 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -2,14 +2,20 @@ from typing import Optional, Tuple, Union, List from enum import Enum, auto from typing import Optional, Tuple, Union -from .helpers import parse_numeric_argument, align_addr, int_to_bytes -from .base_types import Program, T_RelativeAddress, InstructionContext, Instruction +from .helpers import parse_numeric_argument, align_addr, int_to_bytes, get_section_base_name +from .types import Program, T_RelativeAddress, InstructionContext, Instruction from .colors import FMT_PARSE, FMT_NONE from .exceptions import ParseException, ASSERT_LEN, ASSERT_NOT_NULL from .tokenizer import Token -from .types import BinaryDataMemorySection, InstructionMemorySection +from .base import BinaryDataMemorySection, InstructionMemorySection INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini') +""" +A tuple containing all section names which contain executable code (instead of data) + +The first segment of each segment (first segment of ".text.main" is ".text") is checked +against this list to determine the type of it. +""" class MemorySectionType(Enum): @@ -64,17 +70,21 @@ class ParseContext: if self.section is None: return if self.section.type == MemorySectionType.Data: - section = BinaryDataMemorySection(self.section.data, self.section.name, self.context, self.program) + section = BinaryDataMemorySection( + self.section.data, self.section.name, self.context, self.program.name, self.section.base + ) self.program.add_section(section) elif self.section.type == MemorySectionType.Instructions: - section = InstructionMemorySection(self.section.data, self.section.name, self.context, self.program) + section = InstructionMemorySection( + self.section.data, self.section.name, self.context, self.program.name, self.section.base + ) self.program.add_section(section) self.section = None - def new_section(self, name: str, type: MemorySectionType): + def new_section(self, name: str, type: MemorySectionType, alignment: int = 4): base = 0 if self.section is not None: - base = align_addr(self.section.current_address(), 4) + base = align_addr(self.section.current_address(), alignment) print("base at {}".format(base)) self._finalize_section() self.section = CurrentSection(name, type, base) @@ -94,10 +104,6 @@ def ASSERT_IN_SECTION_TYPE(context: ParseContext, type: MemorySectionType): ) -def get_section_base_name(section_name: str) -> str: - return '.' + section_name.split('.')[1] - - class AssemblerDirectives: """ This class represents a collection of all assembler directives as documented by diff --git a/riscemu/base.py b/riscemu/base.py new file mode 100644 index 0000000..3989266 --- /dev/null +++ b/riscemu/base.py @@ -0,0 +1,81 @@ +""" +This file contains a base implementation of Instruction, and MemorySection. + +This aims to be a simple base, usable for everyone who needs the basic functionality, but doesn't +want to set up their own subtypes of Instruction and MemorySection +""" + +from typing import List, Tuple +from .exceptions import MemoryAccessException +from .helpers import parse_numeric_argument +from .types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \ + T_AbsoluteAddress, Program + + +class SimpleInstruction(Instruction): + def __init__(self, name: str, args: Tuple[str], context: InstructionContext, addr: T_RelativeAddress): + self.context = context + self.name = name + self.args = args + self.addr = addr + + def get_imm(self, num: int) -> int: + resolved_label = self.context.resolve_label(self.args[num], self.addr) + if resolved_label is None: + return parse_numeric_argument(self.args[num]) + return resolved_label + + def get_imm_reg(self, num: int) -> Tuple[int, str]: + return self.get_imm(num + 1), self.get_reg(num) + + def get_reg(self, num: int) -> str: + return self.args[num] + + +class InstructionMemorySection(MemorySection): + def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0): + self.name = name + self.base = base + self.context = context + self.size = len(instructions) * 4 + self.flags = MemoryFlags(True, True) + self.instructions = instructions + self.owner = owner + + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read') + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write') + + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + if offset % 4 != 0: + raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch') + return self.instructions[offset // 4] + + +class BinaryDataMemorySection(MemorySection): + def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None): + self.name = name + self.base = base + self.context = context + self.size = len(data) + self.flags = flags if flags is not None else MemoryFlags(False, False) + self.data = data + self.owner = owner + + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + if offset + size > self.size: + raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read') + return self.data[offset:offset + size] + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + if offset + size > self.size: + raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write') + if len(data[0:size]) != size: + raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write') + self.data[offset:offset + size] = data[0:size] + + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self), + offset, 4, 'instruction fetch') diff --git a/riscemu/base_types.py b/riscemu/base_types.py deleted file mode 100644 index 43dfb73..0000000 --- a/riscemu/base_types.py +++ /dev/null @@ -1,188 +0,0 @@ -""" -RiscEmu (c) 2021 Anton Lydike - -SPDX-License-Identifier: MIT - -This file contains base classes which represent loaded programs -""" - -import re -from abc import ABC, abstractmethod -from collections import defaultdict -from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple, Set - -from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE -from .exceptions import ParseException -from .helpers import format_bytes - -T_RelativeAddress = int -T_AbsoluteAddress = int - -NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$') - - -@dataclass(frozen=True) -class MemoryFlags: - read_only: bool - executable: bool - - def __repr__(self): - return "{}({},{})".format( - self.__class__.__name__, - 'ro' if self.read_only else 'rw', - 'x' if self.executable else '-' - ) - - -class InstructionContext: - base_address: T_AbsoluteAddress - """ - The address where the instruction block is placed - """ - - labels: Dict[str, T_RelativeAddress] - """ - This dictionary maps all labels to their relative position of the instruction block - """ - numbered_labels: Dict[str, List[T_RelativeAddress]] - """ - This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where - the label was placed - """ - - def __init__(self): - self.labels = dict() - self.numbered_labels = defaultdict(list) - self.base_address = 0 - - def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_RelativeAddress]: - if NUMBER_SYMBOL_PATTERN.match(symbol): - if address_at is None: - raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol)) - - direction = symbol[-1] - if direction == 'b': - return max([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr < address_at], - default=None) - else: - return min([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr > address_at], - default=None) - else: - return self.labels.get(symbol, None) - - -class Instruction(ABC): - name: str - args: tuple - - @abstractmethod - def get_imm(self, num: int) -> int: - """ - parse and get immediate argument - """ - pass - - @abstractmethod - def get_imm_reg(self, num: int) -> Tuple[int, str]: - """ - parse and get an argument imm(reg) - """ - pass - - @abstractmethod - def get_reg(self, num: int) -> str: - """ - parse and get an register argument - """ - pass - - def __repr__(self): - return "{} {}".format(self.name, ", ".join(self.args)) - - -@dataclass -class MemorySection(ABC): - name: str - flags: MemoryFlags - size: int - base: T_AbsoluteAddress - owner: str - context: InstructionContext - - @abstractmethod - def read(self, offset: T_RelativeAddress, size: int) -> bytearray: - pass - - @abstractmethod - def write(self, offset: T_RelativeAddress, size: int, data: bytearray): - pass - - @abstractmethod - def read_ins(self, offset: T_RelativeAddress) -> Instruction: - pass - - def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress], fmt: str = 'hex', - bytes_per_row: int = 16, rows: int = 10, group: int = 4): - if self.flags.executable: - bytes_per_row = 4 - highlight = None - if end is None: - end = start + (bytes_per_row * (rows // 2)) - highlight = start - start = start - (bytes_per_row * (rows // 2)) - if self.flags.executable: - print(FMT_MEM + "{}, viewing {} instructions:".format( - self, (end - start) // 4 - ) + FMT_NONE) - - for addr in range(start, end, 4): - if addr == highlight: - print(FMT_UNDERLINE + FMT_ORANGE, end='') - print("0x{:x}: {}{}".format( - self.base + addr, self.read_ins(addr), FMT_NONE - )) - else: - print(FMT_MEM + "{}, viewing {} bytes:".format( - self, (end - start) - ) + FMT_NONE) - - for addr in range(start, end, bytes_per_row): - hi_ind = (highlight - addr) // group - print("0x{:x}: {}{}".format( - self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE - )) - - def __repr__(self): - return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format( - self.__class__.__name__, - self.name, - self.base, - self.size, - self.flags, - self.owner - ) - - -class Program: - name: str - context: InstructionContext - global_labels: Set[str] - sections: List[MemorySection] - base: T_AbsoluteAddress = 0 - - def __init__(self, name: str, base: int = 0): - self.name = name - self.context = InstructionContext() - self.sections = [] - self.base = base - self.global_labels = set() - - def add_section(self, sec: MemorySection): - self.sections.append(sec) - - def __repr__(self): - return "{}(name={},context={},globals={},sections={},base={})".format( - self.__class__.__name__, self.name, self.context, self.global_labels, - [s.name for s in self.sections], self.base - ) diff --git a/riscemu/debug.py b/riscemu/debug.py index 8350526..c89d686 100644 --- a/riscemu/debug.py +++ b/riscemu/debug.py @@ -7,7 +7,7 @@ SPDX-License-Identifier: MIT import typing from .registers import Registers from .colors import FMT_DEBUG, FMT_NONE -from .base_types import Instruction +from .types import Instruction from .helpers import * if typing.TYPE_CHECKING: diff --git a/riscemu/exceptions.py b/riscemu/exceptions.py index fd6f130..e9291ee 100644 --- a/riscemu/exceptions.py +++ b/riscemu/exceptions.py @@ -5,8 +5,11 @@ SPDX-License-Identifier: MIT """ from abc import abstractmethod -from .base_types import Instruction from .colors import * +import typing + +if typing.TYPE_CHECKING: + from .types import Instruction class RiscemuBaseException(BaseException): @@ -112,7 +115,7 @@ class InvalidAllocationException(RiscemuBaseException): class UnimplementedInstruction(RiscemuBaseException): - def __init__(self, ins: Instruction): + def __init__(self, ins: 'Instruction'): self.ins = ins def message(self): diff --git a/riscemu/helpers.py b/riscemu/helpers.py index 9946b5d..bbec01f 100644 --- a/riscemu/helpers.py +++ b/riscemu/helpers.py @@ -139,3 +139,9 @@ class Peekable(Generic[T], Iterator[T]): def is_empty(self) -> bool: return self.peek() is None + + +def get_section_base_name(section_name: str) -> str: + if '.' not in section_name: + print(FMT_PARSE + f"Invalid section {section_name}, not starting with a dot!" + FMT_NONE) + return '.' + section_name.split('.')[1] diff --git a/riscemu/instructions/InstructionSet.py b/riscemu/instructions/InstructionSet.py index b6a19b7..8b277c6 100644 --- a/riscemu/instructions/InstructionSet.py +++ b/riscemu/instructions/InstructionSet.py @@ -10,7 +10,7 @@ from abc import ABC from ..CPU import CPU from ..helpers import to_unsigned from ..exceptions import ASSERT_LEN, ASSERT_IN -from ..base_types import Instruction +from ..types import Instruction class InstructionSet(ABC): diff --git a/riscemu/instructions/RV32I.py b/riscemu/instructions/RV32I.py index 5a30b5f..e3db8a3 100644 --- a/riscemu/instructions/RV32I.py +++ b/riscemu/instructions/RV32I.py @@ -11,7 +11,7 @@ from ..colors import FMT_DEBUG, FMT_NONE from ..debug import launch_debug_session from ..exceptions import LaunchDebuggerException from ..syscall import Syscall -from ..base_types import Instruction +from ..types import Instruction class RV32I(InstructionSet): diff --git a/riscemu/parser.py b/riscemu/parser.py index 7186d99..f1e94f6 100644 --- a/riscemu/parser.py +++ b/riscemu/parser.py @@ -5,15 +5,15 @@ SPDX-License-Identifier: MIT """ import os import re -from typing import Dict, Tuple, Iterable, Callable +from typing import Dict, Tuple, Iterable, Callable, List from .helpers import Peekable from .assembler import MemorySectionType, ParseContext, AssemblerDirectives -from .base_types import Program +from .types import Program, T_ParserOpts, ProgramLoader from .colors import FMT_PARSE from .exceptions import ParseException from .tokenizer import Token, TokenType, tokenize -from .types import SimpleInstruction +from .base import SimpleInstruction def parse_instruction(token: Token, args: Tuple[str], context: ParseContext): @@ -53,7 +53,6 @@ def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program: for token, args in composite_tokenizer(Peekable[Token](tokens_iter)): if token.type not in PARSERS: raise ParseException("Unexpected token type: {}, {}".format(token, args)) - print("{} {}".format(token, args)) PARSERS[token.type](token, args, context) return context.finalize() @@ -92,9 +91,37 @@ def take_arguments(tokens: Peekable[Token]) -> Iterable[str]: next(tokens) break break - #raise ParseException("Expected newline, instead got {}".format(tokens.peek())) + # raise ParseException("Expected newline, instead got {}".format(tokens.peek())) -def parse_program_from_file(path: str) -> Program: - with open(path, 'r') as f: - return parse_tokens(os.path.split(path)[-1], tokenize(f)) +class AssemblyFileLoader(ProgramLoader): + """ + This class loads assembly files written by hand. It understands some assembler directives and supports most + pseudo instructions. It does very little verification of source correctness. + + It also supports numbered jump targets and properly supports local and global scope (.globl assembly directive) + + + The AssemblyFileLoader loads .asm, .S and .s files by default, and acts as a weak fallback to all other filetypes. + """ + def parse(self) -> Program: + with open(self.source_path, 'r') as f: + return parse_tokens(self.filename, tokenize(f)) + + @classmethod + def can_parse(cls, source_path: str) -> float: + """ + + It also acts as a weak fallback if no other loaders want to take the file. + + :param source_path: the path to the source file + :return: + """ + # gcc recognizes these line endings as assembly. So we will do too. + if source_path.split('.')[-1] in ('asm', 'S', 's'): + return 1 + return 0.01 + + @classmethod + def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]: + return argv, {} diff --git a/riscemu/priv/ElfLoader.py b/riscemu/priv/ElfLoader.py index 3a4bc33..48fab49 100644 --- a/riscemu/priv/ElfLoader.py +++ b/riscemu/priv/ElfLoader.py @@ -1,11 +1,9 @@ -from dataclasses import dataclass -from typing import List, Dict, Tuple +from typing import List from .Exceptions import * -from ..exceptions import RiscemuBaseException -from ..base_types import MemoryFlags, LoadedMemorySection -from ..decoder import decode, RISCV_REGS, format_ins +from .types import ElfMemorySection from ..helpers import FMT_PARSE, FMT_NONE, FMT_GREEN, FMT_BOLD +from ..types import MemoryFlags, Program, ProgramLoader, T_ParserOpts FMT_ELF = FMT_GREEN + FMT_BOLD @@ -13,41 +11,53 @@ if typing.TYPE_CHECKING: from elftools.elf.elffile import ELFFile from elftools.elf.sections import Section, SymbolTableSection -# This requires pyelftools package! - INCLUDE_SEC = ('.text', '.stack', '.bss', '.sdata', '.sbss') -class ElfExecutable: - sections: List['ElfLoadedMemorySection'] - sections_by_name: Dict[str, 'ElfLoadedMemorySection'] - symbols: Dict[str, int] - run_ptr: int +class ElfBinaryFileLoader(ProgramLoader): + """ + Loads compiled elf binaries (checks for the magic sequence 7f45 4c46) + + This loader respects local and global symbols. + """ + program: Program - def __init__(self, name: str): - self.sections = list() - self.sections_by_name = dict() - self.symbols = dict() + def __init__(self, source_path: str, options: T_ParserOpts): + super().__init__(source_path, options) + self.program = Program(self.filename) + @classmethod + def can_parse(cls, source_path: str) -> float: + with open(source_path, 'rb') as f: + if f.read(4) == b'\x7f\x45\x4c\x46': + return 1 + return 0 + + @classmethod + def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]: + return argv, {} + + def parse(self) -> Program: try: from elftools.elf.elffile import ELFFile from elftools.elf.sections import Section, SymbolTableSection - with open(name, 'rb') as f: - print(FMT_ELF + "[ElfLoader] Loading elf executable from: {}".format(name) + FMT_NONE) + with open(self.source_path, 'rb') as f: + print(FMT_ELF + "[ElfLoader] Loading elf executable from: {}".format(self.source_path) + FMT_NONE) self._read_elf(ELFFile(f)) except ImportError as e: - print(FMT_PARSE + "[ElfLoader] Cannot load elf files without PyElfTools package! You can install them using pip install pyelftools!" + FMT_NONE) + print(FMT_PARSE + "[ElfLoader] Cannot load elf files without PyElfTools package! You can install them " + "using pip install pyelftools!" + FMT_NONE) raise e + return self.program + def _read_elf(self, elf: 'ELFFile'): if not elf.header.e_machine == 'EM_RISCV': raise InvalidElfException("Not a RISC-V elf file!") if not elf.header.e_ident.EI_CLASS == 'ELFCLASS32': raise InvalidElfException("Only 32bit executables are supported!") - self.run_ptr = elf.header.e_entry - from elftools.elf.sections import SymbolTableSection for sec in elf.iter_sections(): if isinstance(sec, SymbolTableSection): @@ -57,29 +67,31 @@ class ElfExecutable: if sec.name not in INCLUDE_SEC: continue - self.add_sec(self._lms_from_elf_sec(sec, 'kernel')) + self._add_sec(self._lms_from_elf_sec(sec, self.filename)) def _lms_from_elf_sec(self, sec: 'Section', owner: str): is_code = sec.name in ('.text',) data = bytearray(sec.data()) + if len(data) < sec.data_size: + data += bytearray(len(data) - sec.data_size) flags = MemoryFlags(is_code, is_code) print(FMT_ELF + "[ElfLoader] Section {} at: {:X}".format(sec.name, sec.header.sh_addr) + FMT_NONE) - return ElfLoadedMemorySection( - sec.name, - sec.header.sh_addr, - sec.data_size, - data, - flags, - owner + return ElfMemorySection( + data, sec.name, self.program.context, owner, sec.header.sh_addr, flags ) def _parse_symtab(self, symtab: 'SymbolTableSection'): - self.symbols = { - sym.name: sym.entry.st_value for sym in symtab.iter_symbols() if sym.name - } - - def add_sec(self, new_sec: 'ElfLoadedMemorySection'): - for sec in self.sections: + for sym in symtab.iter_symbols(): + if not sym.name: + continue + self.program.context.labels[sym.name] = sym.entry.st_value + # check if it has st_visibility bit set + if sym.entry.st_shndx == 1: # STB_GLOBAL = 1 + self.program.global_labels.add(sym.name) + print(FMT_PARSE + "LOADED GLOBAL SYMBOL {}: {}".format(sym.name, sym.entry.st_value) + FMT_NONE) + + def _add_sec(self, new_sec: 'ElfMemorySection'): + for sec in self.program.sections: if sec.base < sec.end <= new_sec.base or sec.end > sec.base >= new_sec.end: continue else: @@ -88,78 +100,4 @@ class ElfExecutable: ) + FMT_NONE) raise RuntimeError("Cannot load elf with overlapping sections!") - self.sections.append(new_sec) - self.sections_by_name[new_sec.name] = new_sec - - -class InvalidElfException(RiscemuBaseException): - def __init__(self, msg: str): - super().__init__() - self.msg = msg - - def message(self): - return FMT_PARSE + "{}(\"{}\")".format(self.__class__.__name__, self.msg) + FMT_NONE - - -@dataclass(frozen=True) -class ElfInstruction: - name: str - args: List[int] - encoded: int - - def get_imm(self, num: int) -> int: - return self.args[num] - - def get_imm_reg(self, num: int) -> Tuple[int, int]: - return self.args[-1], self.args[-2] - - def get_reg(self, num: int) -> str: - return RISCV_REGS[self.args[num]] - - def __repr__(self) -> str: - if self.name == 'jal' and self.args[0] == 0: - return "j {}".format(self.args[1]) - if self.name == 'addi' and self.args[2] == 0: - return "mv {}, {}".format(self.get_reg(0), self.get_reg(1)) - if self.name == 'addi' and self.args[1] == 0: - return "li {}, {}".format(self.get_reg(0), self.args[2]) - if self.name == 'ret' and len(self.args) == 0: - return "ret" - return format_ins(self.encoded, self.name) - # if self.name in ('lw', 'lh', 'lb', 'lbu', 'lhu', 'sw', 'sh', 'sb'): - # args = "{}, {}({})".format( - # RISCV_REGS[self.args[0]], self.args[2], RISCV_REGS[self.args[1]] - # ) - # else: - # args = ", ".join(map(str, self.args)) - # return "{:<8} {}".format( - # self.name, - # args - # ) - - -class ElfLoadedMemorySection(LoadedMemorySection): - ins_cache: List[Optional[ElfInstruction]] - """ - A fast cache for accessing pre-decoded instructions - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.__setattr__('ins_cache', [None] * (self.size // 4)) - - def read_instruction(self, offset): - if self.ins_cache[offset//4] is not None: - return self.ins_cache[offset//4] - if not self.flags.executable: - print(FMT_PARSE + "Reading instruction from non-executable memory!" + FMT_NONE) - raise InstructionAccessFault(offset + self.base) - if offset % 4 != 0: - raise InstructionAddressMisalignedTrap(offset + self.base) - ins = ElfInstruction(*decode(self.content[offset:offset + 4])) - self.ins_cache[offset // 4] = ins - return ins - - @property - def end(self): - return self.size + self.base + self.program.add_section(new_sec) diff --git a/riscemu/priv/Exceptions.py b/riscemu/priv/Exceptions.py index fee6217..01e863f 100644 --- a/riscemu/priv/Exceptions.py +++ b/riscemu/priv/Exceptions.py @@ -5,6 +5,9 @@ from .CSRConsts import MCAUSE_TRANSLATION import typing +from .. import RiscemuBaseException +from ..colors import FMT_PARSE, FMT_NONE + if typing.TYPE_CHECKING: from .ElfLoader import ElfInstruction @@ -52,14 +55,17 @@ class CpuTrap(BaseException): def mcause(self): return (self.interrupt << 31) + self.code + def message(self) -> str: + return "" + def __repr__(self): name = "Reserved interrupt({}, {})".format(self.interrupt, self.code) if (self.interrupt, self.code) in MCAUSE_TRANSLATION: name = MCAUSE_TRANSLATION[(self.interrupt, self.code)] + "({}, {})".format(self.interrupt, self.code) - return "{} {{priv={}, type={}, mtval={:x}}}".format( - name, self.priv.name, self.type.name, self.mtval + return "{} {{priv={}, type={}, mtval={:x}}} {}".format( + name, self.priv.name, self.type.name, self.mtval, self.message() ) def __str__(self): @@ -89,3 +95,29 @@ class TimerInterrupt(CpuTrap): class EcallTrap(CpuTrap): def __init__(self, mode: PrivModes): super().__init__(mode.value + 8, 0, CpuTrapType.EXCEPTION) + + +class InvalidElfException(RiscemuBaseException): + def __init__(self, msg: str): + super().__init__() + self.msg = msg + + def message(self): + return FMT_PARSE + "{}(\"{}\")".format(self.__class__.__name__, self.msg) + FMT_NONE + + +class LoadAccessFault(CpuTrap): + def __init__(self, msg, addr, size, op): + super(LoadAccessFault, self).__init__(5, addr, CpuTrapType.EXCEPTION) + self.msg = msg + self.addr = addr + self.size = size + self.op = op + + def message(self): + return "(During {} at 0x{:08x} of size {}: {})".format( + self.op, + self.addr, + self.size, + self.msg + ) diff --git a/riscemu/priv/ImageLoader.py b/riscemu/priv/ImageLoader.py index 1e89eee..b711568 100644 --- a/riscemu/priv/ImageLoader.py +++ b/riscemu/priv/ImageLoader.py @@ -2,124 +2,74 @@ Laods a memory image with debug information into memory """ -import json -from functools import lru_cache -from typing import Dict, List, Optional, TYPE_CHECKING - -from .ElfLoader import ElfInstruction, ElfLoadedMemorySection, InstructionAccessFault, InstructionAddressMisalignedTrap -from .PrivMMU import PrivMMU -from ..config import RunConfig -from ..base_types import LoadedMemorySection, MemoryFlags -from ..IO.IOModule import IOModule -from ..colors import FMT_ERROR, FMT_NONE, FMT_MEM -from ..decoder import decode - -if TYPE_CHECKING: - pass - - -class MemoryImageMMU(PrivMMU): - io: List[IOModule] - data: bytearray - io_start: int - debug_info: Dict[str, Dict[str, Dict[str, str]]] - - def __init__(self, file_name: str, io_start: int = 0xFF0000): - super(MemoryImageMMU, self).__init__(conf=RunConfig()) - - with open(file_name, 'rb') as memf: - data = memf.read() - with open(file_name + '.dbg', 'r') as dbgf: - debug_info: Dict = json.load(dbgf) - - self.data = bytearray(data) - # TODO: super wasteful memory allocation happening here - if len(data) < io_start: - self.data += bytearray(io_start - len(data)) - self.debug_info = debug_info - self.io_start = io_start - self.io = list() - - def get_entrypoint(self): - try: - start = self.debug_info['symbols']['kernel'].get('_start', None) - if start is not None: - return start - return self.debug_info['symbols']['kernel'].get('_ftext') - except KeyError: - print(FMT_ERROR + '[MMU] cannot find kernel entry in debug information! Falling back to 0x100' + FMT_NONE) - return 0x100 - - @lru_cache(maxsize=2048) - def read_ins(self, addr: int) -> ElfInstruction: - if addr >= self.io_start: - raise InstructionAccessFault(addr) - if addr % 4 != 0: - raise InstructionAddressMisalignedTrap(addr) - - return ElfInstruction(*decode(self.data[addr:addr + 4])) - - def read(self, addr: int, size: int) -> bytearray: - if addr < 0x100: - pc = self.cpu.pc - text_sec = self.get_sec_containing(pc) - print(FMT_ERROR + "[MMU] possible null dereference (read {:x}) from (pc={:x},sec={},rel={:x})".format( - addr, pc, text_sec.owner + ':' + text_sec.name, pc - text_sec.base - ) + FMT_NONE) - if addr >= self.io_start: - return self.io_at(addr).read(addr, size) - return self.data[addr: addr + size] - - def write(self, addr: int, size: int, data): - if addr < 0x100: - pc = self.cpu.pc - text_sec = self.get_sec_containing(pc) - print(FMT_ERROR + "[MMU] possible null dereference (write {:x}) from (pc={:x},sec={},rel={:x})".format( - addr, pc, text_sec.owner + ':' + text_sec.name, pc - text_sec.base - ) + FMT_NONE) - - if addr >= self.io_start: - return self.io_at(addr).write(addr, data, size) - self.data[addr:addr + size] = data[0:size] - - def io_at(self, addr) -> IOModule: - for mod in self.io: - if mod.contains(addr): - return mod - raise InstructionAccessFault(addr) - - def add_io(self, io: IOModule): - self.io.append(io) - - def __repr__(self): - return "MemoryImageMMU()" - - @lru_cache(maxsize=32) - def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]: - next_sec = len(self.data) - for sec_addr, name in reversed(self.debug_info['sections'].items()): - if addr >= int(sec_addr): - owner, name = name.split(':') - base = int(sec_addr) - size = next_sec - base - flags = MemoryFlags('.text' in name, '.text' in name) - return ElfLoadedMemorySection(name, base, size, self.data[base:next_sec], flags, owner) - else: - next_sec = int(sec_addr) - - def translate_address(self, addr: int): - sec = self.get_sec_containing(addr) - if sec.name == '.empty': - return "" - symbs = self.debug_info['symbols'][sec.owner] - for sym, val in reversed(symbs.items()): - if addr >= val: - return "{}{:+x} ({}:{})".format(sym, addr - val, sec.owner, sec.name) - return "{}:{}{:+x}".format(sec.owner, sec.name, addr - sec.base) - - def label(self, symb: str): - print(FMT_MEM + "Looking up symbol {}".format(symb)) - for owner, symbs in self.debug_info['symbols'].items(): - if symb in symbs: - print(" Hit in {}: {} = {}".format(owner, symb, symbs[symb])) - print(FMT_NONE, end="") +import os.path +from typing import List, Iterable + +from .ElfLoader import ElfMemorySection +from .types import MemoryImageDebugInfos +from ..assembler import INSTRUCTION_SECTION_NAMES +from ..colors import FMT_NONE, FMT_PARSE +from ..helpers import get_section_base_name +from ..types import MemoryFlags, ProgramLoader, Program, T_ParserOpts + + +class MemoryImageLoader(ProgramLoader): + + @classmethod + def can_parse(cls, source_path: str) -> float: + if source_path.split('.')[-1] == '.img': + return 1 + return 0 + + @classmethod + def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]: + return argv, {} + + def parse(self) -> Iterable[Program]: + if self.options.get('debug', False): + yield self.parse_no_debug() + return + + with open(self.options.get('debug'), 'r') as debug_file: + debug_info = MemoryImageDebugInfos.load(debug_file.read()) + + with open(self.source_path, 'rb') as source_file: + data: bytearray = bytearray(source_file.read()) + + for name, sections in debug_info.sections.items(): + program = Program(name) + + for sec_name, (start, size) in sections.items(): + if program.base is None: + program.base = start + + in_code_sec = get_section_base_name(sec_name) in INSTRUCTION_SECTION_NAMES + program.add_section( + ElfMemorySection( + data[start:start+size], sec_name, program.context, + name, start, MemoryFlags(in_code_sec, in_code_sec) + ) + ) + + program.context.labels.update(debug_info.symbols.get(name, dict())) + program.global_labels.update(debug_info.globals.get(name, set())) + + yield program + + def parse_no_debug(self) -> Program: + print(FMT_PARSE + "[MemoryImageLoader] Warning: loading memory image without debug information!" + FMT_NONE) + + with open(self.source_path, 'rb') as source_file: + data: bytes = source_file.read() + + p = Program(self.filename) + p.add_section(ElfMemorySection( + bytearray(data), 'memory image contents', p.context, p.name, 0, MemoryFlags(False, True) + )) + return p + + @classmethod + def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader': + if os.path.exists(source_path + '.dbg'): + return MemoryImageLoader(source_path, dict(**options, debug=source_path + '.dbg')) + return MemoryImageLoader(source_path, options) diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index 5297060..9bb5fec 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -15,7 +15,7 @@ from ..IO import TextIO from ..instructions import RV32A, RV32M if typing.TYPE_CHECKING: - from riscemu import base_types, LoadedExecutable, LoadedInstruction + from riscemu import types, LoadedExecutable, LoadedInstruction from riscemu.instructions.InstructionSet import InstructionSet @@ -25,7 +25,7 @@ class PrivCPU(CPU): It should support M and U Mode, but no U-Mode Traps. - This allows us to + This is meant to emulate whole operating systems. """ csr: CSR @@ -44,17 +44,11 @@ class PrivCPU(CPU): the equivalent of "1 byte" (this is actually impossible) """ - def __init__(self, conf, mmu: PrivMMU): + def __init__(self, conf): super().__init__(conf, [PrivRV32I, RV32M, RV32A]) + # start in machine mode self.mode: PrivModes = PrivModes.MACHINE - mmu.set_cpu(self) - self.pc = mmu.get_entrypoint() - self.mmu = mmu - - if hasattr(self.mmu, 'add_io'): - self.mmu.add_io(TextIO.TextIO(0xff0000, 64)) - self.syscall_int = None self.launch_debug = False self.pending_traps: List[CpuTrap] = list() diff --git a/riscemu/priv/PrivRV32I.py b/riscemu/priv/PrivRV32I.py index 278767d..81f446a 100644 --- a/riscemu/priv/PrivRV32I.py +++ b/riscemu/priv/PrivRV32I.py @@ -21,7 +21,7 @@ class PrivRV32I(RV32I): This is an extension of RV32I, written for the PrivCPU class """ - def instruction_csrrw(self, ins: 'LoadedInstruction'): + def instruction_csrrw(self, ins: 'Instruction'): rd, rs, csr_addr = self.parse_crs_ins(ins) old_val = None if rd != 'zero': @@ -34,7 +34,7 @@ class PrivRV32I(RV32I): if old_val is not None: self.regs.set(rd, old_val) - def instruction_csrrs(self, ins: 'LoadedInstruction'): + def instruction_csrrs(self, ins: 'Instruction'): rd, rs, csr_addr = self.parse_crs_ins(ins) if rs != 'zero': # oh no, this should not happen! @@ -45,13 +45,13 @@ class PrivRV32I(RV32I): self.regs.set(rd, old_val) - def instruction_csrrc(self, ins: 'LoadedInstruction'): + def instruction_csrrc(self, ins: 'Instruction'): INS_NOT_IMPLEMENTED(ins) - def instruction_csrrsi(self, ins: 'LoadedInstruction'): + def instruction_csrrsi(self, ins: 'Instruction'): INS_NOT_IMPLEMENTED(ins) - def instruction_csrrwi(self, ins: 'LoadedInstruction'): + def instruction_csrrwi(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) rd, imm, addr = ins.get_reg(0), ins.get_imm(1), ins.get_imm(2) if rd != 'zero': @@ -62,10 +62,10 @@ class PrivRV32I(RV32I): self.cpu.csr.set(addr, imm) - def instruction_csrrci(self, ins: 'LoadedInstruction'): + def instruction_csrrci(self, ins: 'Instruction'): INS_NOT_IMPLEMENTED(ins) - def instruction_mret(self, ins: 'LoadedInstruction'): + def instruction_mret(self, ins: 'Instruction'): if self.cpu.mode != PrivModes.MACHINE: print("MRET not inside machine level code!") raise IllegalInstructionTrap(ins) @@ -90,53 +90,53 @@ class PrivRV32I(RV32I): if self.cpu.conf.verbosity > 1: self.regs.dump_reg_a() - def instruction_uret(self, ins: 'LoadedInstruction'): + def instruction_uret(self, ins: 'Instruction'): raise IllegalInstructionTrap(ins) - def instruction_sret(self, ins: 'LoadedInstruction'): + def instruction_sret(self, ins: 'Instruction'): raise IllegalInstructionTrap(ins) - def instruction_scall(self, ins: 'LoadedInstruction'): + def instruction_scall(self, ins: 'Instruction'): """ Overwrite the scall from userspace RV32I """ raise EcallTrap(self.cpu.mode) - def instruction_beq(self, ins: 'LoadedInstruction'): + def instruction_beq(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 == rs2: self.pc += dst - 4 - def instruction_bne(self, ins: 'LoadedInstruction'): + def instruction_bne(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 != rs2: self.pc += dst - 4 - def instruction_blt(self, ins: 'LoadedInstruction'): + def instruction_blt(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 < rs2: self.pc += dst - 4 - def instruction_bge(self, ins: 'LoadedInstruction'): + def instruction_bge(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 >= rs2: self.pc += dst - 4 - def instruction_bltu(self, ins: 'LoadedInstruction'): + def instruction_bltu(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False) if rs1 < rs2: self.pc += dst - 4 - def instruction_bgeu(self, ins: 'LoadedInstruction'): + def instruction_bgeu(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False) if rs1 >= rs2: self.pc += dst - 4 # technically deprecated - def instruction_j(self, ins: 'LoadedInstruction'): + def instruction_j(self, ins: 'Instruction'): raise NotImplementedError("Should never be reached!") - def instruction_jal(self, ins: 'LoadedInstruction'): + def instruction_jal(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) addr = ins.get_imm(1) @@ -148,20 +148,20 @@ class PrivRV32I(RV32I): self.regs.set(reg, self.pc) self.pc += addr - 4 - def instruction_jalr(self, ins: 'LoadedInstruction'): + def instruction_jalr(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) rd, rs, imm = self.parse_rd_rs_imm(ins) self.regs.set(rd, self.pc) self.pc = rs + imm - 4 - def instruction_sbreak(self, ins: 'LoadedInstruction'): + def instruction_sbreak(self, ins: 'Instruction'): raise LaunchDebuggerException() - def parse_crs_ins(self, ins: 'LoadedInstruction'): + def parse_crs_ins(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) return ins.get_reg(0), ins.get_reg(1), ins.get_imm(2) - def parse_mem_ins(self, ins: 'LoadedInstruction') -> Tuple[str, int]: + def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, int]: ASSERT_LEN(ins.args, 3) addr = self.get_reg_content(ins, 1) + ins.get_imm(2) reg = ins.get_reg(0) diff --git a/riscemu/priv/types.py b/riscemu/priv/types.py new file mode 100644 index 0000000..585f580 --- /dev/null +++ b/riscemu/priv/types.py @@ -0,0 +1,140 @@ +import json +from collections import defaultdict +from dataclasses import dataclass +from functools import lru_cache +from typing import Tuple, Dict, Set + +from riscemu import MemoryAccessException +from riscemu.priv.Exceptions import InstructionAccessFault, InstructionAddressMisalignedTrap, LoadAccessFault +from riscemu.types import Instruction, InstructionContext, T_RelativeAddress, MemoryFlags, T_AbsoluteAddress +from riscemu.base import BinaryDataMemorySection +from riscemu.colors import FMT_NONE, FMT_PARSE +from riscemu.decoder import format_ins, RISCV_REGS, decode + + +@dataclass(frozen=True) +class ElfInstruction(Instruction): + name: str + args: Tuple[int] + encoded: int + + def get_imm(self, num: int) -> int: + return self.args[num] + + def get_imm_reg(self, num: int) -> Tuple[int, int]: + return self.args[-1], self.args[-2] + + def get_reg(self, num: int) -> str: + return RISCV_REGS[self.args[num]] + + def __repr__(self) -> str: + if self.name == 'jal' and self.args[0] == 0: + return "j {}".format(self.args[1]) + if self.name == 'addi' and self.args[2] == 0: + return "mv {}, {}".format(self.get_reg(0), self.get_reg(1)) + if self.name == 'addi' and self.args[1] == 0: + return "li {}, {}".format(self.get_reg(0), self.args[2]) + if self.name == 'ret' and len(self.args) == 0: + return "ret" + return format_ins(self.encoded, self.name) + + +class ElfMemorySection(BinaryDataMemorySection): + def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int, + flags: MemoryFlags): + super().__init__(data, name, context, owner, base=base, flags=flags) + + @lru_cache + def read_ins(self, offset): + if not self.flags.executable: + print(FMT_PARSE + "Reading instruction from non-executable memory!" + FMT_NONE) + raise InstructionAccessFault(offset + self.base) + if offset % 4 != 0: + raise InstructionAddressMisalignedTrap(offset + self.base) + return ElfInstruction(*decode(self.data[offset:offset + 4])) + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + if self.flags.read_only: + raise LoadAccessFault('read-only section', offset + self.base, size, 'write') + self.read_ins.cache_clear() + return super(ElfMemorySection, self).write(offset, size, data) + + @property + def end(self): + return self.size + self.base + + +class MemoryImageDebugInfos: + VERSION = '1' + """ + Schema version + """ + + base: T_AbsoluteAddress = 0 + """ + The base address where the image starts. Defaults to zero. + """ + + sections: Dict[str, Dict[str, Tuple[int, int]]] + """ + This dictionary maps a program and section to (start address, section length) + """ + + symbols: Dict[str, Dict[str, int]] + """ + This dictionary maps a program and a symbol to a value + """ + + globals: Dict[str, Set[str]] + """ + This dictionary contains the list of all global symbols of a given program + """ + + def __init__(self, + sections: Dict[str, Dict[str, Tuple[int, int]]], + symbols: Dict[str, Dict[str, int]], + globals: Dict[str, Set[str]], + base: int = 0 + ): + self.sections = sections + self.symbols = symbols + self.globals = globals + self.base = base + + def serialize(self) -> str: + def serialize(obj: any) -> str: + if isinstance(obj, defaultdict): + return json.dumps(dict(obj), default=serialize) + if isinstance(obj, (set, tuple)): + return json.dumps(list(obj), default=serialize) + return "<>".format(getattr(obj, '__qualname__', '{unknown}')) + + return json.dumps( + dict(sections=self.sections, symbols=self.symbols, globals=self.globals, base=self.base), + default=serialize + ) + + @classmethod + def load(cls, serialized_str: str) -> 'MemoryImageDebugInfos': + json_obj: dict = json.loads(serialized_str) + + if 'VERSION' not in json_obj: + raise RuntimeError("Unknown MemoryImageDebugInfo version!") + + version: str = json_obj.pop('VERSION') + + # compare major version + if version != cls.VERSION or version.split('.')[0] != cls.VERSION.split('.')[0]: + raise RuntimeError( + "Unknown MemoryImageDebugInfo version! This emulator expects version {}, debug info version {}".format( + cls.VERSION, version + ) + ) + + return MemoryImageDebugInfos(**json_obj) + + @classmethod + def builder(cls) -> 'MemoryImageDebugInfos': + return MemoryImageDebugInfos( + defaultdict(dict), defaultdict(dict), defaultdict(set) + ) diff --git a/riscemu/syscall.py b/riscemu/syscall.py index 0abfa0b..6f22177 100644 --- a/riscemu/syscall.py +++ b/riscemu/syscall.py @@ -18,9 +18,9 @@ if typing.TYPE_CHECKING: from . import CPU SYSCALLS = { - 63: 'read', - 64: 'write', - 93: 'exit', + 63: 'read', + 64: 'write', + 93: 'exit', 1024: 'open', 1025: 'close', } @@ -35,6 +35,7 @@ OPEN_MODES = { } """All available file open modes""" + @dataclass(frozen=True) class Syscall: """ @@ -199,4 +200,4 @@ class SyscallInterface: return "{}(\n\tfiles={}\n)".format( self.__class__.__name__, self.open_files - ) \ No newline at end of file + ) diff --git a/riscemu/tokenizer.py b/riscemu/tokenizer.py index 2820a09..35dcfe9 100644 --- a/riscemu/tokenizer.py +++ b/riscemu/tokenizer.py @@ -133,7 +133,3 @@ def split_whitespace_respecting_quotes(line: str) -> Iterable[str]: if part: yield part - - - - diff --git a/riscemu/types.py b/riscemu/types.py index 998eba1..c175e09 100644 --- a/riscemu/types.py +++ b/riscemu/types.py @@ -1,74 +1,412 @@ -from typing import List, Tuple -from .exceptions import MemoryAccessException -from .helpers import parse_numeric_argument -from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \ - T_AbsoluteAddress, Program +""" +RiscEmu (c) 2021 Anton Lydike +SPDX-License-Identifier: MIT + +This file contains abstract base classes and types, bundling only the absolute basic functionality + +See base.py for some basic implementations of these classes +""" +import os +import re +from abc import ABC, abstractmethod +from collections import defaultdict +from dataclasses import dataclass +from typing import Dict, List, Optional, Tuple, Set, Union, Generator, Iterator, Callable, Type + +from . import MMU, InstructionSet +from .assembler import get_section_base_name +from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_PARSE, FMT_RED, FMT_BOLD +from .exceptions import ParseException +from .helpers import format_bytes + +# define some base type aliases so we can keep track of absolute and relative addresses +T_RelativeAddress = int +T_AbsoluteAddress = int + +# parser options are just dictionaries with arbitrary values +T_ParserOpts = Dict[str, any] + +NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$') + + +@dataclass(frozen=True) +class MemoryFlags: + read_only: bool + executable: bool + + def __repr__(self): + return "r{}{}".format( + '-' if self.read_only else 'w', + 'x' if self.executable else '-' + ) + + +class InstructionContext: + base_address: T_AbsoluteAddress + """ + The address where the instruction block is placed + """ + + labels: Dict[str, T_RelativeAddress] + """ + This dictionary maps all labels to their relative position of the instruction block + """ + + numbered_labels: Dict[str, List[T_RelativeAddress]] + """ + This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where + the label was placed + """ + + global_symbol_dict: Dict[str, T_AbsoluteAddress] + """ + A reference to the MMU for access to global symbols + """ + + def __init__(self): + self.labels = dict() + self.numbered_labels = defaultdict(list) + self.base_address = 0 + self.global_symbol_dict = dict() + + def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]: + if NUMBER_SYMBOL_PATTERN.match(symbol): + if address_at is None: + raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol)) + + direction = symbol[-1] + if direction == 'b': + return max([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr < address_at], + default=None) + else: + return min([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr > address_at], + default=None) + else: + if symbol not in self.labels: + return self.global_symbol_dict.get(symbol, None) + value = self.labels.get(symbol, None) + if value is None: + return value + return value + self.base_address -class SimpleInstruction(Instruction): - def __init__(self, name: str, args: Tuple[str], context: InstructionContext, addr: T_RelativeAddress): - self.context = context - self.name = name - self.args = args - self.addr = addr +class Instruction(ABC): + name: str + args: tuple + + @abstractmethod def get_imm(self, num: int) -> int: - resolved_label = self.context.resolve_label(self.args[num], self.addr) - if resolved_label is None: - return parse_numeric_argument(self.args[num]) - return resolved_label + """ + parse and get immediate argument + """ + pass + @abstractmethod def get_imm_reg(self, num: int) -> Tuple[int, str]: - return self.get_imm(num + 1), self.get_reg(num) + """ + parse and get an argument imm(reg) + """ + pass + @abstractmethod def get_reg(self, num: int) -> str: - return self.args[num] + """ + parse and get an register argument + """ + pass + def __repr__(self): + return "{} {}".format(self.name, ", ".join(self.args)) -class InstructionMemorySection(MemorySection): - def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: Program, base: int = 0): - self.name = name - self.base = base - self.context = context - self.size = len(instructions) * 4 - self.flags = MemoryFlags(True, True) - self.instructions = instructions - self.owner = owner.name +@dataclass +class MemorySection(ABC): + name: str + flags: MemoryFlags + size: int + base: T_AbsoluteAddress + owner: str + context: InstructionContext + + @property + def end(self): + return self.base + self.size + + @abstractmethod def read(self, offset: T_RelativeAddress, size: int) -> bytearray: - raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read') + pass + @abstractmethod def write(self, offset: T_RelativeAddress, size: int, data: bytearray): - raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write') + pass + @abstractmethod def read_ins(self, offset: T_RelativeAddress) -> Instruction: - if offset % 4 != 0: - raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch') - return self.instructions[offset // 4] + pass + + def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex', + bytes_per_row: int = 16, rows: int = 10, group: int = 4): + if self.flags.executable: + bytes_per_row = 4 + highlight = None + if end is None: + end = min(start + (bytes_per_row * (rows // 2)), self.size - 1) + highlight = start + start = max(0, start - (bytes_per_row * (rows // 2))) + + if self.flags.executable: + print(FMT_MEM + "{}, viewing {} instructions:".format( + self, (end - start) // 4 + ) + FMT_NONE) + + for addr in range(start, end, 4): + if addr == highlight: + print(FMT_UNDERLINE + FMT_ORANGE, end='') + print("0x{:04x}: {}{}".format( + self.base + addr, self.read_ins(addr), FMT_NONE + )) + else: + print(FMT_MEM + "{}, viewing {} bytes:".format( + self, (end - start) + ) + FMT_NONE) + + aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end + + for addr in range(start, aligned_end, bytes_per_row): + hi_ind = (highlight - addr) // group if highlight is not None else -1 + print("0x{:04x}: {}{}".format( + self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE + )) + + if aligned_end != end: + hi_ind = (highlight - aligned_end) // group if highlight is not None else -1 + print("0x{:04x}: {}{}".format( + self.base + aligned_end, format_bytes( + self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind + ), FMT_NONE + )) + + def dump_all(self, *args, **kwargs): + self.dump(0, self.size, *args, **kwargs) + + def __repr__(self): + return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format( + self.__class__.__name__, + self.name, + self.base, + self.size, + self.flags, + self.owner + ) + + +class Program: + """ + This represents a collection of sections which together form an executable program + + When you want to create a program which can be located anywhere in memory, set base to None, + this signals the other components, that this is relocatable. Set the base of each section to + the offset in the program, and everything will be taken care of for you. + + """ + name: str + context: InstructionContext + global_labels: Set[str] + sections: List[MemorySection] + base: Optional[T_AbsoluteAddress] + is_loaded: bool + @property + def size(self): + if len(self.sections) == 0: + return 0 + if self.base is None: + return self.sections[-1].base + self.sections[-1].size + return (self.sections[-1].base - self.base) + self.sections[-1].size -class BinaryDataMemorySection(MemorySection): - def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: Program, base: int = 0): + def __init__(self, name: str, base: Optional[int] = None): self.name = name + self.context = InstructionContext() + self.sections = [] + self.global_labels = set() self.base = base - self.context = context - self.size = len(data) - self.flags = MemoryFlags(False, False) - self.data = data - self.owner = owner.name + self.loaded = False - def read(self, offset: T_RelativeAddress, size: int) -> bytearray: - if offset + size > self.size: - raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read') - return self.data[offset:offset + size] + def add_section(self, sec: MemorySection): + # print a warning when a section is located before the programs base + if self.base is not None: + if sec.base < self.base: + print(FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format( + sec, self.name, self.base + ) + FMT_NONE) - def write(self, offset: T_RelativeAddress, size: int, data: bytearray): - if offset + size > self.size: - raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write') - if len(data[0:size]) != size: - raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write') - self.data[offset:offset + size] = data[0:size] + self.sections.append(sec) + # keep section list ordered + self.sections.sort(key=lambda section: section.base) - def read_ins(self, offset: T_RelativeAddress) -> Instruction: - raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self), - offset, 4, 'instruction fetch') + def __repr__(self): + return "{}(name={},globals={},sections={},base={})".format( + self.__class__.__name__, self.name, self.global_labels, + [s.name for s in self.sections], self.base + ) + + @property + def entrypoint(self): + base = 0 if self.base is None else self.base + if '_start' in self.context.labels: + return base + self.context.labels.get('_start') + if 'main' in self.context.labels: + return base + self.context.labels.get('main') + for sec in self.sections: + if get_section_base_name(sec.name) == '.text' and sec.flags.executable: + return base + sec.base + + def loaded_trigger(self, at_addr: T_AbsoluteAddress): + """ + This trigger is called when the binary is loaded and its final address in memory is determined + + This will do a small sanity check to prevent programs loading twice, or at addresses they don't + expect to be loaded. + + :param at_addr: the address where the program will be located + """ + if self.is_loaded: + if at_addr != self.base: + raise RuntimeError("Program loaded twice at different addresses! This will probably break things!") + return + + if self.base is not None and self.base != at_addr: + print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, ' + 'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE) + + # if the program is not located anywhere explicitly in memory, add the program address + # to the defined section bases + if self.base is None: + for sec in self.sections: + sec.base += at_addr + + if self.base != at_addr: + # move sections so they are located where they want to be located + offset = at_addr - self.base + for sec in self.sections: + sec.base += offset + + self.base = at_addr + self.context.base_address = at_addr + + +class ProgramLoader(ABC): + """ + A program loader is always specific to a given source file. It is a place to store all state + concerning the parsing and loading of that specific source file, including options. + """ + + def __init__(self, source_path: str, options: T_ParserOpts): + self.source_path = source_path + self.options = options + self.filename = os.path.split(self.source_path)[-1] + + @classmethod + @abstractmethod + def can_parse(cls, source_path: str) -> float: + """ + Return confidence that the file located at source_path + should be parsed and loaded by this loader + :param source_path: the path of the source file + :return: the confidence that this file belongs to this parser + """ + pass + + @classmethod + @abstractmethod + def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]: + """ + parse command line args into an options dictionary + + :param argv: the command line args list + :return: all remaining command line args and the parser options object + """ + pass + + @classmethod + def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader': + """ + Instantiate a loader for the given source file with the required arguments + + :param source_path: the path to the source file + :param options: the parsed options (guaranteed to come from this classes get_options method. + :return: An instance of a ProgramLoader for the spcified source + """ + return cls(source_path, options) + + @abstractmethod + def parse(self) -> Union[Program, Iterator[Program]]: + """ + + :return: + """ + pass + + +class CPU(ABC): + # static cpu configuration + INS_XLEN: int = 4 + + # housekeeping variables + mmu: MMU + pc: T_AbsoluteAddress + cycle: int + halted: bool + + # debugging context + debugger_active: bool + + # instruction information + instructions: Dict[str, Callable[[Instruction], None]] + instruction_sets: Set[InstructionSet] + + def __init__(self, mmu: MMU, instruction_sets: List[Type[InstructionSet]]): + self.mmu = mmu + + self.instruction_sets = set() + self.instructions = dict() + + for set_class in instruction_sets: + ins_set = set_class(self) + self.instructions.update(ins_set.load()) + self.instruction_sets.add(ins_set) + + self.cycle = 0 + self.pc = 0 + self.debugger_active = False + + self.sections = list() + self.programs = list() + + def run_instruction(self, ins: Instruction): + """ + Execute a single instruction + + :param ins: The instruction to execute + """ + if ins.name in self.instructions: + self.instructions[ins.name](ins) + else: + # this should never be reached, as unknown instructions are imparseable + raise RuntimeError("Unknown instruction: {}".format(ins)) + + def load_program(self, program: Program): + self.mmu.load_program(program) + + def __repr__(self): + """ + Returns a representation of the CPU and some of its state. + """ + return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format( + self.__class__.__name__, + self.pc, + self.cycle, + self.halted, + " ".join(s.name for s in self.instruction_sets) + ) From b396e0c5ebc4c2263fea8e8afdbe38d74d3ec77d Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Fri, 11 Feb 2022 18:27:10 +0100 Subject: [PATCH 06/30] user mode emulator finally working again --- riscemu/CPU.py | 168 +++++++----------- riscemu/MMU.py | 34 +++- riscemu/__main__.py | 27 +-- riscemu/config.py | 3 +- riscemu/debug.py | 57 +++--- riscemu/exceptions.py | 11 +- riscemu/instructions/RV32A.py | 2 +- riscemu/instructions/RV32I.py | 30 ++-- riscemu/instructions/RV32M.py | 2 +- riscemu/instructions/__init__.py | 2 +- .../{InstructionSet.py => instruction_set.py} | 0 riscemu/parser.py | 11 +- riscemu/priv/PrivCPU.py | 4 +- riscemu/registers.py | 19 +- riscemu/syscall.py | 9 +- riscemu/tokenizer.py | 2 +- riscemu/types.py | 43 +++-- 17 files changed, 229 insertions(+), 195 deletions(-) rename riscemu/instructions/{InstructionSet.py => instruction_set.py} (100%) diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 2b72a66..2ac4548 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -1,142 +1,106 @@ """ -RiscEmu (c) 2021 Anton Lydike +RiscEmu (c) 2021-2022 Anton Lydike SPDX-License-Identifier: MIT -This file contains the CPU logic (not the individual instruction sets). See instructions/InstructionSet.py for more info +This file contains the CPU logic (not the individual instruction sets). See instructions/instruction_set.py for more info on them. """ -import sys -from typing import Tuple, List, Dict, Callable, Type +import typing +from typing import List, Type -from .types import MemoryFlags -from .syscall import SyscallInterface, get_syscall_symbols -from .exceptions import RiscemuBaseException, LaunchDebuggerException +import riscemu from .MMU import MMU -from .config import RunConfig -from .registers import Registers +from .base import BinaryDataMemorySection +from .colors import FMT_CPU, FMT_NONE from .debug import launch_debug_session -from .colors import FMT_CPU, FMT_NONE, FMT_ERROR - -import riscemu - -import typing +from .exceptions import RiscemuBaseException, LaunchDebuggerException +from .syscall import SyscallInterface, get_syscall_symbols +from .types import CPU if typing.TYPE_CHECKING: - from . import types, LoadedExecutable, LoadedInstruction - from .instructions.InstructionSet import InstructionSet + from .instructions.instruction_set import InstructionSet -class CPU: +class UserModeCPU(CPU): """ This class represents a single CPU. It holds references to it's mmu, registers and syscall interrupt handler. It is initialized with a configuration and a list of instruction sets. """ - INS_XLEN = 4 - - def __init__(self, conf: RunConfig, instruction_sets: List[Type['riscemu.InstructionSet']]): + def __init__(self, instruction_sets: List[Type['riscemu.InstructionSet']]): """ Creates a CPU instance. - :param conf: An instance of the current RunConfiguration :param instruction_sets: A list of instruction set classes. These must inherit from the InstructionSet class """ # setup CPU states - self.pc = 0 - self.cycle = 0 - self.exit: bool = False - self.exit_code: int = 0 - self.conf = conf - self.active_debug = False # if a debugging session is currently runnign - - self.stack: typing.Optional['riscemu.LoadedMemorySection'] = None - - # setup MMU, registers and syscall handlers - self.mmu = MMU(conf) - self.regs = Registers(conf) - self.syscall_int = SyscallInterface() - - # load all instruction sets - self.instruction_sets: List[riscemu.InstructionSet] = list() - self.instructions: Dict[str, Callable[[LoadedInstruction], None]] = dict() - for set_class in instruction_sets: - ins_set = set_class(self) - self.instructions.update(ins_set.load()) - self.instruction_sets.append(ins_set) + super().__init__(MMU(), instruction_sets) - # provide global syscall symbols if option is set - if conf.include_scall_symbols: - self.mmu.global_symbols.update(get_syscall_symbols()) + self.exit_code = 0 - def continue_from_debugger(self, verbose=True): - """ - called from the debugger to continue running + # setup syscall interface + self.syscall_int = SyscallInterface() - :param verbose: If True, will print each executed instruction to STDOUT - """ - self._run(verbose) + # add global syscall symbols, but don't overwrite any user-defined symbols + syscall_symbols = get_syscall_symbols() + syscall_symbols.update(self.mmu.global_symbols) + self.mmu.global_symbols.update(syscall_symbols) - def step(self): + def step(self, verbose=False): """ Execute a single instruction, then return. """ - if self.exit: + if self.halted: print(FMT_CPU + "[CPU] Program exited with code {}".format(self.exit_code) + FMT_NONE) - else: - try: - self.cycle += 1 - ins = self.mmu.read_ins(self.pc) - print(FMT_CPU + " Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins)) - self.pc += self.INS_XLEN - self.run_instruction(ins) - except LaunchDebuggerException: - print(FMT_CPU + "[CPU] Returning to debugger!" + FMT_NONE) - except RiscemuBaseException as ex: - self.pc -= self.INS_XLEN - print(ex.message()) + return + + launch_debugger = False - def _run(self, verbose=False): - if self.pc <= 0: - return False - ins = None try: - while not self.exit: - self.cycle += 1 - ins = self.mmu.read_ins(self.pc) - if verbose: - print(FMT_CPU + " Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins)) - self.pc += self.INS_XLEN - self.run_instruction(ins) + self.cycle += 1 + ins = self.mmu.read_ins(self.pc) + if verbose: + print(FMT_CPU + " Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins)) + self.pc += self.INS_XLEN + self.run_instruction(ins) except RiscemuBaseException as ex: - if not isinstance(ex, LaunchDebuggerException): - print(FMT_ERROR + "[CPU] excpetion caught at 0x{:08X}: {}:".format(self.pc - 1, ins) + FMT_NONE) + if isinstance(ex, LaunchDebuggerException): + # if the debugger is active, raise the exception to + if self.debugger_active: + raise ex + + print(FMT_CPU + '[CPU] Debugger launch requested!' + FMT_NONE) + launch_debugger = True + else: print(ex.message()) - self.pc -= self.INS_XLEN - - if self.active_debug: - print(FMT_CPU + "[CPU] Returning to debugger!" + FMT_NONE) - return - if self.conf.debug_on_exception: - launch_debug_session(self, self.mmu, self.regs, "Exception encountered, launching debug:") - - if self.exit: - print() - print(FMT_CPU + "Program exited with code {}".format(self.exit_code) + FMT_NONE) - sys.exit(self.exit_code) - else: - print() - print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE) - - def __repr__(self): + ex.print_stacktrace() + print(FMT_CPU + '[CPU] Halting due to exception!' + FMT_NONE) + self.halted = True + + if launch_debugger: + launch_debug_session(self) + + def run(self, verbose=False): + while not self.halted: + self.step(verbose) + + def setup_stack(self, stack_size=1024 * 4) -> bool: """ - Returns a representation of the CPU and some of its state. + Create program stack and populate stack pointer + :param stack_size: the size of the required stack, defaults to 4Kib + :return: """ - return "{}(pc=0x{:08X}, cycle={}, exit={}, instructions={})".format( - self.__class__.__name__, - self.pc, - self.cycle, - self.exit, - " ".join(s.name for s in self.instruction_sets) + stack_sec = BinaryDataMemorySection( + bytearray(stack_size), + '.stack', + None, # FIXME: why does a binary data memory section require a context? + '', + 0 ) + + if not self.mmu.load_section(stack_sec, fixed_position=False): + return False + + self.regs.set('sp', stack_sec.base + stack_sec.size) diff --git a/riscemu/MMU.py b/riscemu/MMU.py index f5d0375..ff59c5c 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -49,9 +49,11 @@ class MMU: """ Create a new MMU """ + self.programs = list() self.sections = list() self.global_symbols = dict() + def get_sec_containing(self, addr: T_AbsoluteAddress) -> Optional[MemorySection]: """ Returns the section that contains the address addr @@ -79,8 +81,8 @@ class MMU: """ sec = self.get_sec_containing(addr) if sec is None: - print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! " - "Have you forgotten an exit syscall or ret statement?" + FMT_NONE) + print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! (read at {}) ".format(addr) + + "Have you forgotten an exit syscall or ret statement?" + FMT_NONE) raise RuntimeError("No next instruction available!") return sec.read_ins(addr - sec.base) @@ -181,8 +183,7 @@ class MMU: at_addr = program.base else: - first_guaranteed_free_address = self.sections[-1].base + self.sections[-1].size - at_addr = align_addr(first_guaranteed_free_address, align_to) + at_addr = align_addr(self.get_guaranteed_free_address(), align_to) # trigger the load event to set all addresses in the binary program.loaded_trigger(at_addr) @@ -200,10 +201,35 @@ class MMU: # FIXME: this is pretty unclean and should probably be solved in a better way in the future program.context.global_symbol_dict = self.global_symbols + def load_section(self, sec: MemorySection, fixed_position: bool = False) -> bool: + if fixed_position: + if self.has_continous_free_region(sec.base, sec.base + sec.size): + self.sections.append(sec) + self._update_state() + else: + print(FMT_MEM + '[MMU] Cannot place section {} at {}, space is occupied!'.format(sec, sec.base)) + return False + else: + at_addr = align_addr(self.get_guaranteed_free_address(), 8) + sec.base = at_addr + self.sections.append(sec) + self._update_state() + return True + def _update_state(self): + """ + Called whenever a section or program is added to keep the list of programs and sections consistent + :return: + """ self.programs.sort(key=lambda bin: bin.base) self.sections.sort(key=lambda sec: sec.base) + def get_guaranteed_free_address(self) -> T_AbsoluteAddress: + if len(self.sections) == 0: + return 0x100 + else: + return self.sections[-1].base + self.sections[-1].size + def __repr__(self): return "MMU(\n\t{}\n)".format( "\n\t".join(repr(x) for x in self.programs) diff --git a/riscemu/__main__.py b/riscemu/__main__.py index 9ec6d4d..87db6bb 100644 --- a/riscemu/__main__.py +++ b/riscemu/__main__.py @@ -5,17 +5,18 @@ SPDX-License-Identifier: MIT This file holds the logic for starting the emulator from the CLI """ +from riscemu.CPU import UserModeCPU if __name__ == '__main__': - from . import * + from .config import RunConfig from .helpers import * from .instructions import InstructionSetDict + from riscemu.parser import AssemblyFileLoader import argparse import sys all_ins_names = list(InstructionSetDict.keys()) - class OptionStringAction(argparse.Action): def __init__(self, option_strings, dest, keys=None, omit_empty=False, **kwargs): if keys is None: @@ -93,17 +94,21 @@ if __name__ == '__main__': ] try: - cpu = CPU(cfg, ins_to_load) - loaded_exe = None + cpu = UserModeCPU(ins_to_load) + + opts = AssemblyFileLoader.get_options(sys.argv) for file in args.files: - tk = cpu.get_tokenizer(RiscVInput.from_file(file)) - tk.tokenize() - loaded_exe = cpu.load(ExecutableParser(tk).parse()) + loader = AssemblyFileLoader.instantiate(file, opts) + + cpu.load_program(loader.parse()) # run the last loaded executable - cpu.run_loaded(loaded_exe) + + cpu.setup_stack(cfg.stack_size) + + # launch the last loaded program + cpu.launch(cpu.mmu.programs[-1]) except RiscemuBaseException as e: - print("Error while parsing: {}".format(e.message())) - import traceback + print("Error: {}".format(e.message())) + e.print_stacktrace() - traceback.print_exception(type(e), e, e.__traceback__) sys.exit(1) diff --git a/riscemu/config.py b/riscemu/config.py index 7182958..e5f49a6 100644 --- a/riscemu/config.py +++ b/riscemu/config.py @@ -1,11 +1,10 @@ """ -RiscEmu (c) 2021 Anton Lydike +RiscEmu (c) 2021-2022 Anton Lydike SPDX-License-Identifier: MIT """ from dataclasses import dataclass -from typing import Optional @dataclass(frozen=True, init=True) diff --git a/riscemu/debug.py b/riscemu/debug.py index c89d686..a186097 100644 --- a/riscemu/debug.py +++ b/riscemu/debug.py @@ -4,35 +4,33 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -import typing -from .registers import Registers -from .colors import FMT_DEBUG, FMT_NONE -from .types import Instruction +from .base import SimpleInstruction from .helpers import * if typing.TYPE_CHECKING: - from . import * + from riscemu import CPU, Registers -def launch_debug_session(cpu: 'CPU', mmu: 'MMU', reg: 'Registers', prompt=""): - if not cpu.conf.debug_instruction or cpu.active_debug: +def launch_debug_session(cpu: 'CPU', prompt=""): + if cpu.debugger_active: return import code import readline import rlcompleter - cpu.active_debug = True + # set the active debug flag + cpu.debugger_active = True # setup some aliases: - registers = reg - regs = reg - memory = mmu - mem = mmu - syscall_interface = cpu.syscall_int + registers = cpu.regs + regs = cpu.regs + memory = cpu.mmu + mem = cpu.mmu + mmu = cpu.mmu # setup helper functions: def dump(what, *args, **kwargs): - if isinstance(what, Registers): + if what == regs: regs.dump(*args, **kwargs) else: mmu.dump(what, *args, **kwargs) @@ -50,20 +48,39 @@ def launch_debug_session(cpu: 'CPU', mmu: 'MMU', reg: 'Registers', prompt=""): return bin = mmu.get_bin_containing(cpu.pc) - ins = Instruction(name, list(args), bin) - print(FMT_DEBUG + "Running instruction " + ins + FMT_NONE) + ins = SimpleInstruction( + name, + tuple(args), + bin.context, + cpu.pc) + print(FMT_DEBUG + "Running instruction {}".format(ins) + FMT_NONE) cpu.run_instruction(ins) def cont(verbose=False): - cpu.continue_from_debugger(verbose) + try: + cpu.run(verbose) + except LaunchDebuggerException: + print(FMT_DEBUG + 'Returning to debugger...') + return def step(): - cpu.step() + try: + cpu.step() + except LaunchDebuggerException: + return + # collect all variables sess_vars = globals() sess_vars.update(locals()) + # add tab completion readline.set_completer(rlcompleter.Completer(sess_vars).complete) readline.parse_and_bind("tab: complete") - code.InteractiveConsole(sess_vars).interact(banner=FMT_DEBUG + prompt + FMT_NONE, exitmsg="Exiting debugger") - cpu.active_debug = False + + relaunch_debugger = False + + try: + code.InteractiveConsole(sess_vars).interact(banner=FMT_DEBUG + prompt + FMT_NONE, exitmsg="Exiting debugger") + finally: + cpu.debugger_active = False + diff --git a/riscemu/exceptions.py b/riscemu/exceptions.py index e9291ee..3e95dc7 100644 --- a/riscemu/exceptions.py +++ b/riscemu/exceptions.py @@ -17,6 +17,9 @@ class RiscemuBaseException(BaseException): def message(self): pass + def print_stacktrace(self): + import traceback + traceback.print_exception(type(self), self, self.__traceback__) # Parsing exceptions: @@ -115,13 +118,15 @@ class InvalidAllocationException(RiscemuBaseException): class UnimplementedInstruction(RiscemuBaseException): - def __init__(self, ins: 'Instruction'): + def __init__(self, ins: 'Instruction', context = None): self.ins = ins + self.context = context def message(self): - return FMT_CPU + "{}({})".format( + return FMT_CPU + "{}({}{})".format( self.__class__.__name__, - repr(self.ins) + repr(self.ins), + ', context={}'.format(self.context) if self.context is not None else '' ) + FMT_NONE diff --git a/riscemu/instructions/RV32A.py b/riscemu/instructions/RV32A.py index ba6a8a6..44c3f32 100644 --- a/riscemu/instructions/RV32A.py +++ b/riscemu/instructions/RV32A.py @@ -1,4 +1,4 @@ -from .InstructionSet import InstructionSet, Instruction +from .instruction_set import InstructionSet, Instruction from ..exceptions import INS_NOT_IMPLEMENTED from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed diff --git a/riscemu/instructions/RV32I.py b/riscemu/instructions/RV32I.py index e3db8a3..291ccbe 100644 --- a/riscemu/instructions/RV32I.py +++ b/riscemu/instructions/RV32I.py @@ -4,7 +4,8 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -from .InstructionSet import * +from .instruction_set import * +from ..CPU import UserModeCPU from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed from ..colors import FMT_DEBUG, FMT_NONE @@ -116,14 +117,8 @@ class RV32I(InstructionSet): ) def instruction_add(self, ins: 'Instruction'): - dst = "" - if self.cpu.conf.add_accept_imm: - try: - dst, rs1, rs2 = self.parse_rd_rs_imm(ins) - except: - pass - if not dst: - dst, rs1, rs2 = self.parse_rd_rs_rs(ins) + # FIXME: once configuration is figured out, add flag to support immediate arg in add instruction + dst, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( dst, @@ -292,20 +287,19 @@ class RV32I(InstructionSet): def instruction_scall(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 0) + + if not isinstance(self.cpu, UserModeCPU): + # FIXME: add exception for syscall not supported or something + raise + syscall = Syscall(self.regs.get('a7'), self.cpu) self.cpu.syscall_int.handle_syscall(syscall) def instruction_sbreak(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 0) - if self.cpu.active_debug: - print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE) - raise LaunchDebuggerException() - launch_debug_session( - self.cpu, - self.mmu, - self.regs, - "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) - ) + + print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE) + raise LaunchDebuggerException() def instruction_nop(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 0) diff --git a/riscemu/instructions/RV32M.py b/riscemu/instructions/RV32M.py index 31b9341..d8ae08b 100644 --- a/riscemu/instructions/RV32M.py +++ b/riscemu/instructions/RV32M.py @@ -4,7 +4,7 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -from .InstructionSet import * +from .instruction_set import * from ..exceptions import INS_NOT_IMPLEMENTED diff --git a/riscemu/instructions/__init__.py b/riscemu/instructions/__init__.py index 65bda29..96fb524 100644 --- a/riscemu/instructions/__init__.py +++ b/riscemu/instructions/__init__.py @@ -6,7 +6,7 @@ SPDX-License-Identifier: MIT This package holds all instruction sets, available to the processor """ -from .InstructionSet import InstructionSet +from .instruction_set import InstructionSet from .RV32M import RV32M from .RV32I import RV32I from .RV32A import RV32A diff --git a/riscemu/instructions/InstructionSet.py b/riscemu/instructions/instruction_set.py similarity index 100% rename from riscemu/instructions/InstructionSet.py rename to riscemu/instructions/instruction_set.py diff --git a/riscemu/parser.py b/riscemu/parser.py index f1e94f6..711abba 100644 --- a/riscemu/parser.py +++ b/riscemu/parser.py @@ -53,6 +53,7 @@ def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program: for token, args in composite_tokenizer(Peekable[Token](tokens_iter)): if token.type not in PARSERS: raise ParseException("Unexpected token type: {}, {}".format(token, args)) + print('{}: {}'.format(token, args)) PARSERS[token.type](token, args, context) return context.finalize() @@ -73,6 +74,8 @@ def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, T token = next(tokens) if token.type in (TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME): yield token, tuple(take_arguments(tokens)) + else: + print("skipped {}".format(token)) def take_arguments(tokens: Peekable[Token]) -> Iterable[str]: @@ -85,12 +88,14 @@ def take_arguments(tokens: Peekable[Token]) -> Iterable[str]: while True: if tokens.peek().type == TokenType.ARGUMENT: yield next(tokens).value - if tokens.peek().type == TokenType.COMMA: - next(tokens) elif tokens.peek().type == TokenType.NEWLINE: next(tokens) break - break + elif tokens.peek().type == TokenType.COMMA: + next(tokens) + else: + break + # raise ParseException("Expected newline, instead got {}".format(tokens.peek())) diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index 9bb5fec..6fa83eb 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -16,7 +16,7 @@ from ..instructions import RV32A, RV32M if typing.TYPE_CHECKING: from riscemu import types, LoadedExecutable, LoadedInstruction - from riscemu.instructions.InstructionSet import InstructionSet + from riscemu.instructions.instruction_set import InstructionSet class PrivCPU(CPU): @@ -83,7 +83,7 @@ class PrivCPU(CPU): self.launch_debug = False launch_debug_session(self, self.mmu, self.regs, "Launching debugger:") - if not self.active_debug: + if not self.debugger_active: self._run(verbose) else: print() diff --git a/riscemu/registers.py b/riscemu/registers.py index a3de09a..aa45915 100644 --- a/riscemu/registers.py +++ b/riscemu/registers.py @@ -1,28 +1,23 @@ """ -RiscEmu (c) 2021 Anton Lydike +RiscEmu (c) 2021-2022 Anton Lydike SPDX-License-Identifier: MIT """ -from .config import RunConfig -from .helpers import * from collections import defaultdict -from .exceptions import InvalidRegisterException + +from .helpers import * + class Registers: """ Represents a bunch of registers """ - def __init__(self, conf: RunConfig): - """ - Initialize the register configuration, respecting the RunConfig conf - :param conf: The RunConfig - """ + def __init__(self): self.vals = defaultdict(lambda: 0) self.last_set = None self.last_read = None - self.conf = conf def dump(self, full=False): """ @@ -96,7 +91,7 @@ class Registers: """ if reg == 'zero': return False - #if reg not in Registers.all_registers(): + # if reg not in Registers.all_registers(): # raise InvalidRegisterException(reg) # replace fp register with s1, as these are the same register if reg == 'fp': @@ -114,7 +109,7 @@ class Registers: :param mark_read: If the register should be markes as "last read" (only used internally) :return: The contents of register reg """ - #if reg not in Registers.all_registers(): + # if reg not in Registers.all_registers(): # raise InvalidRegisterException(reg) if reg == 'fp': reg = 's0' diff --git a/riscemu/syscall.py b/riscemu/syscall.py index 6f22177..4bfcbbe 100644 --- a/riscemu/syscall.py +++ b/riscemu/syscall.py @@ -15,7 +15,7 @@ import riscemu import typing if typing.TYPE_CHECKING: - from . import CPU + from riscemu.CPU import UserModeCPU SYSCALLS = { 63: 'read', @@ -43,7 +43,7 @@ class Syscall: """ id: int """The syscall number (e.g. 64 - write)""" - cpu: 'riscemu.CPU' + cpu: 'UserModeCPU' """The CPU object that created the syscall""" @property @@ -146,7 +146,8 @@ class SyscallInterface: Requires running with flag scall-fs """ - if not scall.cpu.conf.scall_fs: + # FIXME: this should be toggleable in a global setting or somethign + if True: print(FMT_SYSCALL + '[Syscall] open: opening files not supported without scall-fs flag!' + FMT_NONE) return scall.ret(-1) @@ -193,7 +194,7 @@ class SyscallInterface: """ Exit syscall. Exits the system with status code a0 """ - scall.cpu.exit = True + scall.cpu.halted = True scall.cpu.exit_code = scall.cpu.regs.get('a0') def __repr__(self): diff --git a/riscemu/tokenizer.py b/riscemu/tokenizer.py index 35dcfe9..e855b9d 100644 --- a/riscemu/tokenizer.py +++ b/riscemu/tokenizer.py @@ -14,7 +14,7 @@ from .exceptions import ParseException LINE_COMMENT_STARTERS = ('#', ';', '//') WHITESPACE_PATTERN = re.compile(r'\s+') -MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$') +MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+|[A-z0-9_-]+)\(([A-z]+[0-9]{0,2})\)$') REGISTER_NAMES = RISCV_REGS diff --git a/riscemu/types.py b/riscemu/types.py index c175e09..b265cb9 100644 --- a/riscemu/types.py +++ b/riscemu/types.py @@ -9,16 +9,20 @@ See base.py for some basic implementations of these classes """ import os import re +import typing from abc import ABC, abstractmethod from collections import defaultdict from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple, Set, Union, Generator, Iterator, Callable, Type +from typing import Dict, List, Optional, Tuple, Set, Union, Iterator, Callable, Type -from . import MMU, InstructionSet -from .assembler import get_section_base_name -from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_PARSE, FMT_RED, FMT_BOLD +from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_RED, FMT_BOLD from .exceptions import ParseException -from .helpers import format_bytes +from .helpers import format_bytes, get_section_base_name +from .registers import Registers + +if typing.TYPE_CHECKING: + from .MMU import MMU + from .instructions.instruction_set import InstructionSet # define some base type aliases so we can keep track of absolute and relative addresses T_RelativeAddress = int @@ -231,7 +235,7 @@ class Program: self.sections = [] self.global_labels = set() self.base = base - self.loaded = False + self.is_loaded = False def add_section(self, sec: MemorySection): # print a warning when a section is located before the programs base @@ -286,7 +290,7 @@ class Program: for sec in self.sections: sec.base += at_addr - if self.base != at_addr: + if self.base is not None and self.base != at_addr: # move sections so they are located where they want to be located offset = at_addr - self.base for sec in self.sections: @@ -354,7 +358,8 @@ class CPU(ABC): INS_XLEN: int = 4 # housekeeping variables - mmu: MMU + regs: Registers + mmu: 'MMU' pc: T_AbsoluteAddress cycle: int halted: bool @@ -364,10 +369,11 @@ class CPU(ABC): # instruction information instructions: Dict[str, Callable[[Instruction], None]] - instruction_sets: Set[InstructionSet] + instruction_sets: Set['InstructionSet'] - def __init__(self, mmu: MMU, instruction_sets: List[Type[InstructionSet]]): + def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']]): self.mmu = mmu + self.regs = Registers() self.instruction_sets = set() self.instructions = dict() @@ -377,6 +383,7 @@ class CPU(ABC): self.instructions.update(ins_set.load()) self.instruction_sets.add(ins_set) + self.halted = False self.cycle = 0 self.pc = 0 self.debugger_active = False @@ -410,3 +417,19 @@ class CPU(ABC): self.halted, " ".join(s.name for s in self.instruction_sets) ) + + @abstractmethod + def step(self, verbose=False): + pass + + @abstractmethod + def run(self, verbose=False): + pass + + def launch(self, program: Program, verbose: bool = False): + if program not in self.mmu.programs: + print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE) + return + + self.pc = program.entrypoint + self.run(verbose) From 7904a4dae88777cf8886367b1115fcfa57ccb36b Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Fri, 11 Feb 2022 18:31:23 +0100 Subject: [PATCH 07/30] added verbosity control to user mode emulator --- riscemu/__main__.py | 9 +++++++-- riscemu/syscall.py | 6 +----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/riscemu/__main__.py b/riscemu/__main__.py index 87db6bb..fbfe96b 100644 --- a/riscemu/__main__.py +++ b/riscemu/__main__.py @@ -17,6 +17,7 @@ if __name__ == '__main__': all_ins_names = list(InstructionSetDict.keys()) + class OptionStringAction(argparse.Action): def __init__(self, option_strings, dest, keys=None, omit_empty=False, **kwargs): if keys is None: @@ -65,6 +66,9 @@ if __name__ == '__main__': parser.add_argument('--stack_size', type=int, help='Stack size of loaded programs, defaults to 8MB', nargs='?') + parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count', + default=0) + args = parser.parse_args() # create a RunConfig from the cli args @@ -75,7 +79,8 @@ if __name__ == '__main__': debug_on_exception=not args.options['fail_on_ex'], add_accept_imm=args.options['add_accept_imm'], scall_fs=args.syscall_opts['fs_access'], - scall_input=not args.syscall_opts['disable_input'] + scall_input=not args.syscall_opts['disable_input'], + verbosity=args.verbose ) for k, v in dict(cfg_dict).items(): if v is None: @@ -106,7 +111,7 @@ if __name__ == '__main__': cpu.setup_stack(cfg.stack_size) # launch the last loaded program - cpu.launch(cpu.mmu.programs[-1]) + cpu.launch(cpu.mmu.programs[-1], verbose=cfg.verbosity > 1) except RiscemuBaseException as e: print("Error: {}".format(e.message())) e.print_stacktrace() diff --git a/riscemu/syscall.py b/riscemu/syscall.py index 4bfcbbe..e46c49a 100644 --- a/riscemu/syscall.py +++ b/riscemu/syscall.py @@ -4,16 +4,12 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ +import sys from dataclasses import dataclass from typing import Dict, IO -import sys from .helpers import * -import riscemu - -import typing - if typing.TYPE_CHECKING: from riscemu.CPU import UserModeCPU From 2880a59dbb84b3859061db41154d04c8cdc6d7f1 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Fri, 11 Feb 2022 18:53:26 +0100 Subject: [PATCH 08/30] fixed ascii escape sequences and section address calculation --- riscemu/assembler.py | 3 +++ riscemu/types.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/riscemu/assembler.py b/riscemu/assembler.py index ec63833..de29044 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -173,6 +173,9 @@ class AssemblerDirectives: @classmethod def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True): + # replace '\t' and '\n' escape sequences + text = text.replace('\\n', '\n').replace('\\t', '\t') + encoded_bytes = bytearray(text.encode('ascii')) if zero_terminate: encoded_bytes += bytearray(1) diff --git a/riscemu/types.py b/riscemu/types.py index b265cb9..e2895f0 100644 --- a/riscemu/types.py +++ b/riscemu/types.py @@ -65,7 +65,7 @@ class InstructionContext: global_symbol_dict: Dict[str, T_AbsoluteAddress] """ - A reference to the MMU for access to global symbols + A reference to the MMU's global symbol dictionary for access to global symbols """ def __init__(self): @@ -264,7 +264,7 @@ class Program: return base + self.context.labels.get('main') for sec in self.sections: if get_section_base_name(sec.name) == '.text' and sec.flags.executable: - return base + sec.base + return sec.base def loaded_trigger(self, at_addr: T_AbsoluteAddress): """ From 185ae8b94ebeebbe697701c4360bdfb042ff2e49 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Fri, 11 Feb 2022 20:25:19 +0100 Subject: [PATCH 09/30] added config and better loading code to CPU base --- riscemu/CPU.py | 11 +++++-- riscemu/__main__.py | 5 ++- riscemu/priv/PrivCPU.py | 71 ++++++++++++++++++---------------------- riscemu/priv/__main__.py | 7 ++-- riscemu/types.py | 15 ++++++++- 5 files changed, 59 insertions(+), 50 deletions(-) diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 2ac4548..6340495 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -10,13 +10,14 @@ import typing from typing import List, Type import riscemu +from . import AssemblyFileLoader, RunConfig from .MMU import MMU from .base import BinaryDataMemorySection from .colors import FMT_CPU, FMT_NONE from .debug import launch_debug_session from .exceptions import RiscemuBaseException, LaunchDebuggerException from .syscall import SyscallInterface, get_syscall_symbols -from .types import CPU +from .types import CPU, ProgramLoader if typing.TYPE_CHECKING: from .instructions.instruction_set import InstructionSet @@ -29,14 +30,14 @@ class UserModeCPU(CPU): It is initialized with a configuration and a list of instruction sets. """ - def __init__(self, instruction_sets: List[Type['riscemu.InstructionSet']]): + def __init__(self, instruction_sets: List[Type['riscemu.InstructionSet']], conf: RunConfig): """ Creates a CPU instance. :param instruction_sets: A list of instruction set classes. These must inherit from the InstructionSet class """ # setup CPU states - super().__init__(MMU(), instruction_sets) + super().__init__(MMU(), instruction_sets, conf) self.exit_code = 0 @@ -104,3 +105,7 @@ class UserModeCPU(CPU): return False self.regs.set('sp', stack_sec.base + stack_sec.size) + + @classmethod + def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]: + return [AssemblyFileLoader] diff --git a/riscemu/__main__.py b/riscemu/__main__.py index fbfe96b..1a88200 100644 --- a/riscemu/__main__.py +++ b/riscemu/__main__.py @@ -99,15 +99,14 @@ if __name__ == '__main__': ] try: - cpu = UserModeCPU(ins_to_load) + cpu = UserModeCPU(ins_to_load, cfg) opts = AssemblyFileLoader.get_options(sys.argv) for file in args.files: loader = AssemblyFileLoader.instantiate(file, opts) - cpu.load_program(loader.parse()) - # run the last loaded executable + # set up a stack cpu.setup_stack(cfg.stack_size) # launch the last loaded program diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index 6fa83eb..a6d9c5a 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -3,19 +3,20 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ +import sys import time from riscemu.CPU import * from .CSR import CSR +from .ElfLoader import ElfBinaryFileLoader from .Exceptions import * -from .PrivMMU import PrivMMU +from .ImageLoader import MemoryImageLoader from .PrivRV32I import PrivRV32I from .privmodes import PrivModes -from ..IO import TextIO from ..instructions import RV32A, RV32M +from ..types import Program if typing.TYPE_CHECKING: - from riscemu import types, LoadedExecutable, LoadedInstruction from riscemu.instructions.instruction_set import InstructionSet @@ -38,21 +39,20 @@ class PrivCPU(CPU): controls the resolution of the time csr register (in nanoseconds) """ - INS_XLEN = 4 + pending_traps: List[CpuTrap] """ - Size of an instruction in memory. Should be 4, but since our loading code is shit, instruction take up - the equivalent of "1 byte" (this is actually impossible) + A list of traps which are pending to be handled """ def __init__(self, conf): - super().__init__(conf, [PrivRV32I, RV32M, RV32A]) + super().__init__(MMU(), [PrivRV32I, RV32M, RV32A], conf) # start in machine mode self.mode: PrivModes = PrivModes.MACHINE - self.syscall_int = None - self.launch_debug = False self.pending_traps: List[CpuTrap] = list() + self.exit_code = 0 + self._time_start = 0 self._time_timecmp = 0 self._time_interrupt_enabled = False @@ -63,45 +63,37 @@ class PrivCPU(CPU): # init csr self._init_csr() - def _run(self, verbose=False): + def run(self, verbose=False): if self.pc <= 0: return False - ins = None + + launch_debug = False + try: - while not self.exit: + while not self.halted: self.step(verbose) except RiscemuBaseException as ex: if isinstance(ex, LaunchDebuggerException): - self.launch_debug = True + launch_debug = True self.pc += self.INS_XLEN - if self.exit: + if self.halted: print() - print(FMT_CPU + "Program exited with code {}".format(self.exit_code) + FMT_NONE) + print(FMT_CPU + "[CPU] System halted with code {}".format(self.exit_code) + FMT_NONE) sys.exit(self.exit_code) - elif self.launch_debug: - self.launch_debug = False - launch_debug_session(self, self.mmu, self.regs, - "Launching debugger:") + + elif launch_debug: + launch_debug_session(self) if not self.debugger_active: - self._run(verbose) + self.run(verbose) else: print() - print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE) + print(FMT_CPU + "[CPU] System stopped without halting - perhaps you stopped the debugger?" + FMT_NONE) - def load(self, e: riscemu.base_types): - raise NotImplementedError("Not supported!") - - def run_loaded(self, le: 'riscemu.LoadedExecutable'): - raise NotImplementedError("Not supported!") - - def get_tokenizer(self, tokenizer_input): - raise NotImplementedError("Not supported!") - - def run(self, verbose: bool = False): + def launch(self, program: Program, verbose: bool = False): print(FMT_CPU + '[CPU] Started running from 0x{:08X} ({})'.format(self.pc, "kernel") + FMT_NONE) self._time_start = time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._run(self.conf.verbosity > 1) + self.run(self.conf.verbosity > 1 or verbose) def _init_csr(self): # set up CSR @@ -184,7 +176,7 @@ class PrivCPU(CPU): if not (len(self.pending_traps) > 0 and self.csr.get_mstatus('mie')): return # select best interrupt - # TODO: actually select based on the official ranking + # FIXME: actually select based on the official ranking trap = self.pending_traps.pop() # use the most recent trap if self.conf.verbosity > 0: print(FMT_CPU + "[CPU] taking trap {}!".format(trap) + FMT_NONE) @@ -209,7 +201,7 @@ class PrivCPU(CPU): if mtvec & 0b11 == 1: self.pc = (mtvec & 0b11111111111111111111111111111100) + (trap.code * 4) self.record_perf_profile() - if len(self._perf_counters) % 100 == 0: + if len(self._perf_counters) > 100: self.show_perf() def show_perf(self): @@ -225,11 +217,6 @@ class PrivCPU(CPU): continue cps = (cycle - cycled) / (time_ns - timed) * 1000000000 - # print(" {:03d} cycles in {:08d}ns ({:.2f} cycles/s)".format( - # cycle - cycled, - # time_ns - timed, - # cps - # )) cycled = cycle timed = time_ns cps_list.append(cps) @@ -238,3 +225,9 @@ class PrivCPU(CPU): def record_perf_profile(self): self._perf_counters.append((time.perf_counter_ns(), self.cycle)) + + @classmethod + def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]: + return [ + AssemblyFileLoader, MemoryImageLoader, ElfBinaryFileLoader + ] \ No newline at end of file diff --git a/riscemu/priv/__main__.py b/riscemu/priv/__main__.py index 36c0d13..2363de4 100644 --- a/riscemu/priv/__main__.py +++ b/riscemu/priv/__main__.py @@ -1,7 +1,6 @@ -from .PrivCPU import PrivCPU, RunConfig -from .ImageLoader import MemoryImageMMU -from .PrivMMU import LoadedElfMMU -from .ElfLoader import ElfExecutable +from .PrivCPU import PrivCPU +from .ElfLoader import ElfBinaryFileLoader +from .ImageLoader import MemoryImageLoader import sys diff --git a/riscemu/types.py b/riscemu/types.py index e2895f0..e2f7065 100644 --- a/riscemu/types.py +++ b/riscemu/types.py @@ -15,6 +15,7 @@ from collections import defaultdict from dataclasses import dataclass from typing import Dict, List, Optional, Tuple, Set, Union, Iterator, Callable, Type +from . import RunConfig from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_RED, FMT_BOLD from .exceptions import ParseException from .helpers import format_bytes, get_section_base_name @@ -371,9 +372,13 @@ class CPU(ABC): instructions: Dict[str, Callable[[Instruction], None]] instruction_sets: Set['InstructionSet'] - def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']]): + # configuration + conf: RunConfig + + def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig): self.mmu = mmu self.regs = Registers() + self.conf = conf self.instruction_sets = set() self.instructions = dict() @@ -433,3 +438,11 @@ class CPU(ABC): self.pc = program.entrypoint self.run(verbose) + + @classmethod + @abstractmethod + def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]: + pass + + def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]: + return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name)) From 3d2619c258e716a9a3b4c4f671a49f03eb29c161 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Fri, 11 Feb 2022 20:29:11 +0100 Subject: [PATCH 10/30] created a better output for reads/writes outside of known regions --- riscemu/MMU.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/riscemu/MMU.py b/riscemu/MMU.py index ff59c5c..0854747 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -7,7 +7,7 @@ SPDX-License-Identifier: MIT from typing import Dict, List, Optional from .colors import * -from .exceptions import InvalidAllocationException +from .exceptions import InvalidAllocationException, MemoryAccessException from .helpers import align_addr, int_from_bytes from .types import Instruction, MemorySection, MemoryFlags, T_AbsoluteAddress, \ Program @@ -97,7 +97,7 @@ class MMU: sec = self.get_sec_containing(addr) if sec is None: print(FMT_MEM + "[MMU] Trying to read data form invalid region at 0x{:x}! ".format(addr) + FMT_NONE) - raise RuntimeError("Reading from uninitialized memory region!") + raise MemoryAccessException("region is non-initialized!", addr, size, 'read') return sec.read(addr - sec.base, size) def write(self, addr: int, size: int, data): @@ -111,7 +111,7 @@ class MMU: sec = self.get_sec_containing(addr) if sec is None: print(FMT_MEM + '[MMU] Invalid write into non-initialized region at 0x{:08X}'.format(addr) + FMT_NONE) - raise RuntimeError("No write pls") + raise MemoryAccessException("region is non-initialized!", addr, size, 'write') return sec.write(addr - sec.base, size, data) From 6fa3558f6c39dcd76fe7741af028ff04dba6181f Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 13 Feb 2022 14:55:03 +0100 Subject: [PATCH 11/30] added interactive mode, fixed some bugs --- riscemu/CPU.py | 4 +++- riscemu/MMU.py | 13 ++++++++--- riscemu/__init__.py | 5 ++--- riscemu/__main__.py | 7 +++--- riscemu/assembler.py | 8 +++++++ riscemu/base.py | 5 +++-- riscemu/debug.py | 14 +++++++++--- riscemu/helpers.py | 1 + riscemu/interactive.py | 25 +++++++++++++++++++++ riscemu/priv/PrivMMU.py | 49 +++++++++++++++++++++-------------------- riscemu/types.py | 45 ++++++++++++++++++++----------------- 11 files changed, 117 insertions(+), 59 deletions(-) create mode 100644 riscemu/interactive.py diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 6340495..8e3c395 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -10,7 +10,7 @@ import typing from typing import List, Type import riscemu -from . import AssemblyFileLoader, RunConfig +from .config import RunConfig from .MMU import MMU from .base import BinaryDataMemorySection from .colors import FMT_CPU, FMT_NONE @@ -18,6 +18,7 @@ from .debug import launch_debug_session from .exceptions import RiscemuBaseException, LaunchDebuggerException from .syscall import SyscallInterface, get_syscall_symbols from .types import CPU, ProgramLoader +from .parser import AssemblyFileLoader if typing.TYPE_CHECKING: from .instructions.instruction_set import InstructionSet @@ -105,6 +106,7 @@ class UserModeCPU(CPU): return False self.regs.set('sp', stack_sec.base + stack_sec.size) + return True @classmethod def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]: diff --git a/riscemu/MMU.py b/riscemu/MMU.py index 0854747..a91ca9b 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -10,7 +10,7 @@ from .colors import * from .exceptions import InvalidAllocationException, MemoryAccessException from .helpers import align_addr, int_from_bytes from .types import Instruction, MemorySection, MemoryFlags, T_AbsoluteAddress, \ - Program + Program, InstructionContext class MMU: @@ -53,7 +53,6 @@ class MMU: self.sections = list() self.global_symbols = dict() - def get_sec_containing(self, addr: T_AbsoluteAddress) -> Optional[MemorySection]: """ Returns the section that contains the address addr @@ -82,7 +81,7 @@ class MMU: sec = self.get_sec_containing(addr) if sec is None: print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! (read at {}) ".format(addr) - + "Have you forgotten an exit syscall or ret statement?" + FMT_NONE) + + "Have you forgotten an exit syscall or ret statement?" + FMT_NONE) raise RuntimeError("No next instruction available!") return sec.read_ins(addr - sec.base) @@ -234,3 +233,11 @@ class MMU: return "MMU(\n\t{}\n)".format( "\n\t".join(repr(x) for x in self.programs) ) + + def context_for(self, addr: T_AbsoluteAddress) -> Optional[InstructionContext]: + sec = self.get_sec_containing(addr) + + if sec is not None: + return sec.context + + return None diff --git a/riscemu/__init__.py b/riscemu/__init__.py index 90ba867..22d1f8b 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -11,14 +11,13 @@ It contains everything needed to run assembly files, so you don't need any custo from .exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException -#from .base_types import Executable, LoadedExecutable, LoadedMemorySection - from .instructions import * from .MMU import MMU from .registers import Registers from .syscall import SyscallInterface, Syscall -from .CPU import CPU +from .CPU import CPU, UserModeCPU +from .debug import launch_debug_session from .config import RunConfig diff --git a/riscemu/__main__.py b/riscemu/__main__.py index 1a88200..21d676e 100644 --- a/riscemu/__main__.py +++ b/riscemu/__main__.py @@ -9,9 +9,9 @@ from riscemu.CPU import UserModeCPU if __name__ == '__main__': from .config import RunConfig - from .helpers import * from .instructions import InstructionSetDict - from riscemu.parser import AssemblyFileLoader + from .colors import FMT_BOLD, FMT_MAGENTA + from .parser import AssemblyFileLoader import argparse import sys @@ -69,7 +69,8 @@ if __name__ == '__main__': parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count', default=0) - args = parser.parse_args() + parser.add_argument('--interactive', help="Launch the interactive debugger instantly instead of loading any " + "programs", action='store_true') # create a RunConfig from the cli args cfg_dict = dict( diff --git a/riscemu/assembler.py b/riscemu/assembler.py index de29044..869b9e4 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -89,6 +89,14 @@ class ParseContext: self._finalize_section() self.section = CurrentSection(name, type, base) + def add_label(self, name: str, value: int, is_global: bool = False, is_relative: bool = False): + self.context.labels[name] = value + if is_global: + self.program.global_labels.add(name) + if is_relative: + self.program.relative_labels.add(name) + + def __repr__(self): return "{}(\n\tsetion={},\n\tprogram={}\n)".format( self.__class__.__name__, self.section, self.program diff --git a/riscemu/base.py b/riscemu/base.py index 3989266..474e4ed 100644 --- a/riscemu/base.py +++ b/riscemu/base.py @@ -5,7 +5,7 @@ This aims to be a simple base, usable for everyone who needs the basic functiona want to set up their own subtypes of Instruction and MemorySection """ -from typing import List, Tuple +from typing import List, Tuple, Union from .exceptions import MemoryAccessException from .helpers import parse_numeric_argument from .types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \ @@ -13,7 +13,8 @@ from .types import Instruction, MemorySection, MemoryFlags, InstructionContext, class SimpleInstruction(Instruction): - def __init__(self, name: str, args: Tuple[str], context: InstructionContext, addr: T_RelativeAddress): + def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]], + context: InstructionContext, addr: T_RelativeAddress): self.context = context self.name = name self.args = args diff --git a/riscemu/debug.py b/riscemu/debug.py index a186097..0885843 100644 --- a/riscemu/debug.py +++ b/riscemu/debug.py @@ -3,6 +3,7 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ +import os.path from .base import SimpleInstruction from .helpers import * @@ -11,6 +12,7 @@ if typing.TYPE_CHECKING: from riscemu import CPU, Registers + def launch_debug_session(cpu: 'CPU', prompt=""): if cpu.debugger_active: return @@ -46,12 +48,12 @@ def launch_debug_session(cpu: 'CPU', prompt=""): if len(args) > 3: print("Invalid arg count!") return - bin = mmu.get_bin_containing(cpu.pc) + context = mmu.context_for(cpu.pc) ins = SimpleInstruction( name, tuple(args), - bin.context, + context, cpu.pc) print(FMT_DEBUG + "Running instruction {}".format(ins) + FMT_NONE) cpu.run_instruction(ins) @@ -76,11 +78,17 @@ def launch_debug_session(cpu: 'CPU', prompt=""): # add tab completion readline.set_completer(rlcompleter.Completer(sess_vars).complete) readline.parse_and_bind("tab: complete") + if os.path.exists('~/.riscemu_history'): + readline.read_history_file('~/.riscemu_history') relaunch_debugger = False try: - code.InteractiveConsole(sess_vars).interact(banner=FMT_DEBUG + prompt + FMT_NONE, exitmsg="Exiting debugger") + code.InteractiveConsole(sess_vars).interact( + banner=FMT_DEBUG + prompt + FMT_NONE, + exitmsg="Exiting debugger", + ) finally: cpu.debugger_active = False + readline.write_history_file('~/.riscemu_history') diff --git a/riscemu/helpers.py b/riscemu/helpers.py index bbec01f..9c94635 100644 --- a/riscemu/helpers.py +++ b/riscemu/helpers.py @@ -61,6 +61,7 @@ def to_signed(num: int, bytes=4) -> int: return num + def create_chunks(my_list, chunk_size): """Split a list like [a,b,c,d,e,f,g,h,i,j,k,l,m] into e.g. [[a,b,c,d],[e,f,g,h],[i,j,k,l],[m]]""" return [my_list[i:i + chunk_size] for i in range(0, len(my_list), chunk_size)] diff --git a/riscemu/interactive.py b/riscemu/interactive.py new file mode 100644 index 0000000..71526f3 --- /dev/null +++ b/riscemu/interactive.py @@ -0,0 +1,25 @@ +from riscemu import RunConfig +from riscemu.base import InstructionMemorySection, SimpleInstruction +from riscemu.types import InstructionContext, Program + +if __name__ == '__main__': + from .CPU import UserModeCPU + from .instructions import InstructionSetDict + from .debug import launch_debug_session + + cpu = UserModeCPU(list(InstructionSetDict.values()), RunConfig(verbosity=4)) + + program = Program('interactive session', base=0x100) + context = program.context + program.add_section(InstructionMemorySection([ + SimpleInstruction('ebreak', (), context, 0x100), + SimpleInstruction('addi', ('a0', 'zero', '0'), context, 0x104), + SimpleInstruction('addi', ('a7', 'zero', '93'), context, 0x108), + SimpleInstruction('scall', (), context, 0x10C), + ], '.text', context, program.name, 0x100)) + + cpu.load_program(program) + + cpu.setup_stack() + + cpu.launch(program) diff --git a/riscemu/priv/PrivMMU.py b/riscemu/priv/PrivMMU.py index 798caa9..f6a86d6 100644 --- a/riscemu/priv/PrivMMU.py +++ b/riscemu/priv/PrivMMU.py @@ -1,42 +1,43 @@ +from .types import ElfMemorySection from ..MMU import * from abc import abstractmethod import typing -from .ElfLoader import ElfExecutable - if typing.TYPE_CHECKING: from .PrivCPU import PrivCPU class PrivMMU(MMU): - cpu: 'PrivCPU' - - @abstractmethod - def get_entrypoint(self) -> int: - raise - def set_cpu(self, cpu: 'PrivCPU'): - self.cpu = cpu + def get_sec_containing(self, addr: T_AbsoluteAddress) -> MemorySection: + # try to get an existing section + existing_sec = super().get_sec_containing(addr) - def translate_address(self, addr: int): - return "" + if existing_sec is not None: + return existing_sec + # get section preceding empty space at addr + sec_before = next((sec for sec in reversed(self.sections) if sec.end < addr), None) + # get sec succeeding empty space at addr + sec_after = next((sec for sec in self.sections if sec.base > addr), None) -class LoadedElfMMU(PrivMMU): - def __init__(self, elf: ElfExecutable): - super().__init__(conf=RunConfig()) - self.entrypoint = elf.symbols['_start'] + # calc start end end of "free" space + prev_sec_end = 0 if sec_before is None else sec_before.end + next_sec_start = 0x7FFFFFFF if sec_after is None else sec_before.base - self.binaries.append(elf) - for sec in elf.sections: - self.sections.append(sec) + # start at the end of the prev section, or current address - 0xFFFF (aligned to 16 byte boundary) + start = max(prev_sec_end, align_addr(addr - 0xFFFF, 16)) + # end at the start of the next section, or address + 0xFFFF (aligned to 16 byte boundary) + end = min(next_sec_start, align_addr(addr + 0xFFFF, 16)) - def load_bin(self, exe: Executable) -> LoadedExecutable: - raise NotImplementedError("This is a privMMU, it's initialized with a single ElfExecutable!") + sec = ElfMemorySection(bytearray(end - start), '.empty', self.global_instruction_context(), '', start, MemoryFlags(False, True)) + self.sections.append(sec) + self._update_state() - def allocate_section(self, name: str, req_size: int, flag: MemoryFlags): - raise NotImplementedError("Not supported!") + return sec - def get_entrypoint(self): - return self.entrypoint + def global_instruction_context(self) -> InstructionContext: + context = InstructionContext() + context.global_symbol_dict = self.global_symbols + return context \ No newline at end of file diff --git a/riscemu/types.py b/riscemu/types.py index e2f7065..f5c9989 100644 --- a/riscemu/types.py +++ b/riscemu/types.py @@ -15,7 +15,7 @@ from collections import defaultdict from dataclasses import dataclass from typing import Dict, List, Optional, Tuple, Set, Union, Iterator, Callable, Type -from . import RunConfig +from .config import RunConfig from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_RED, FMT_BOLD from .exceptions import ParseException from .helpers import format_bytes, get_section_base_name @@ -81,19 +81,17 @@ class InstructionContext: raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol)) direction = symbol[-1] + values = self.numbered_labels.get(symbol[:-1], []) if direction == 'b': - return max([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr < address_at], - default=None) + return max((addr + self.base_address for addr in values if addr < address_at), default=None) else: - return min([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr > address_at], - default=None) + return min((addr + self.base_address for addr in values if addr > address_at), default=None) else: + # if it's not a local symbol, try the globals if symbol not in self.labels: return self.global_symbol_dict.get(symbol, None) - value = self.labels.get(symbol, None) - if value is None: - return value - return value + self.base_address + # otherwise return the local symbol + return self.labels.get(symbol, None) class Instruction(ABC): @@ -218,6 +216,7 @@ class Program: name: str context: InstructionContext global_labels: Set[str] + relative_labels: Set[str] sections: List[MemorySection] base: Optional[T_AbsoluteAddress] is_loaded: bool @@ -235,6 +234,7 @@ class Program: self.context = InstructionContext() self.sections = [] self.global_labels = set() + self.relative_labels = set() self.base = base self.is_loaded = False @@ -285,18 +285,18 @@ class Program: print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, ' 'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE) - # if the program is not located anywhere explicitly in memory, add the program address - # to the defined section bases - if self.base is None: - for sec in self.sections: - sec.base += at_addr + # check if we are relocating + if self.base != at_addr: + offset = at_addr if self.base is None else at_addr - self.base - if self.base is not None and self.base != at_addr: - # move sections so they are located where they want to be located - offset = at_addr - self.base + # move all sections by the offset for sec in self.sections: sec.base += offset + # move all relative symbols by the offset + for name in self.relative_labels: + self.context.labels[name] += offset + self.base = at_addr self.context.base_address = at_addr @@ -393,9 +393,6 @@ class CPU(ABC): self.pc = 0 self.debugger_active = False - self.sections = list() - self.programs = list() - def run_instruction(self, ins: Instruction): """ Execute a single instruction @@ -446,3 +443,11 @@ class CPU(ABC): def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]: return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name)) + + @property + def sections(self): + return self.mmu.sections + + @property + def programs(self): + return self.mmu.programs \ No newline at end of file From 881f4004edee9bebf15a52ebadf166f5a9d58965 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 13 Feb 2022 19:43:44 +0100 Subject: [PATCH 12/30] fixed removed argparse line in riscemu.__init__.py --- riscemu/__main__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/riscemu/__main__.py b/riscemu/__main__.py index 21d676e..82d3d1b 100644 --- a/riscemu/__main__.py +++ b/riscemu/__main__.py @@ -72,6 +72,8 @@ if __name__ == '__main__': parser.add_argument('--interactive', help="Launch the interactive debugger instantly instead of loading any " "programs", action='store_true') + args = parser.parse_args() + # create a RunConfig from the cli args cfg_dict = dict( stack_size=args.stack_size, From 4f1c73df9e9a4f1af4d40447d8d299139c9cd0bf Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 13 Feb 2022 19:44:56 +0100 Subject: [PATCH 13/30] various small bugfixes --- riscemu/CPU.py | 2 ++ riscemu/MMU.py | 4 ++-- riscemu/__main__.py | 2 ++ riscemu/assembler.py | 3 ++- riscemu/debug.py | 7 ++++--- riscemu/helpers.py | 2 +- riscemu/parser.py | 5 +---- riscemu/priv/PrivMMU.py | 2 +- riscemu/types.py | 5 ++--- 9 files changed, 17 insertions(+), 15 deletions(-) diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 8e3c395..3b9235d 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -88,6 +88,8 @@ class UserModeCPU(CPU): while not self.halted: self.step(verbose) + print(FMT_CPU + "[CPU] Program exited with code {}".format(self.exit_code) + FMT_NONE) + def setup_stack(self, stack_size=1024 * 4) -> bool: """ Create program stack and populate stack pointer diff --git a/riscemu/MMU.py b/riscemu/MMU.py index a91ca9b..eeb75d7 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -234,10 +234,10 @@ class MMU: "\n\t".join(repr(x) for x in self.programs) ) - def context_for(self, addr: T_AbsoluteAddress) -> Optional[InstructionContext]: + def context_for(self, addr: T_AbsoluteAddress) -> InstructionContext: sec = self.get_sec_containing(addr) if sec is not None: return sec.context - return None + return InstructionContext() diff --git a/riscemu/__main__.py b/riscemu/__main__.py index 82d3d1b..96ac45e 100644 --- a/riscemu/__main__.py +++ b/riscemu/__main__.py @@ -5,6 +5,7 @@ SPDX-License-Identifier: MIT This file holds the logic for starting the emulator from the CLI """ +from riscemu import RiscemuBaseException from riscemu.CPU import UserModeCPU if __name__ == '__main__': @@ -114,6 +115,7 @@ if __name__ == '__main__': # launch the last loaded program cpu.launch(cpu.mmu.programs[-1], verbose=cfg.verbosity > 1) + except RiscemuBaseException as e: print("Error: {}".format(e.message())) e.print_stacktrace() diff --git a/riscemu/assembler.py b/riscemu/assembler.py index 869b9e4..768d504 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -175,8 +175,9 @@ class AssemblerDirectives: if content is None: content = bytearray(size) - if isinstance(context, int): + if isinstance(content, int): content = int_to_bytes(content, size, unsigned) + context.section.data += content @classmethod diff --git a/riscemu/debug.py b/riscemu/debug.py index 0885843..fd68f7b 100644 --- a/riscemu/debug.py +++ b/riscemu/debug.py @@ -11,6 +11,7 @@ from .helpers import * if typing.TYPE_CHECKING: from riscemu import CPU, Registers +HIST_FILE = os.path.join(os.path.expanduser('~'), '.riscemu_history') def launch_debug_session(cpu: 'CPU', prompt=""): @@ -78,8 +79,8 @@ def launch_debug_session(cpu: 'CPU', prompt=""): # add tab completion readline.set_completer(rlcompleter.Completer(sess_vars).complete) readline.parse_and_bind("tab: complete") - if os.path.exists('~/.riscemu_history'): - readline.read_history_file('~/.riscemu_history') + if os.path.exists(HIST_FILE): + readline.read_history_file(HIST_FILE) relaunch_debugger = False @@ -90,5 +91,5 @@ def launch_debug_session(cpu: 'CPU', prompt=""): ) finally: cpu.debugger_active = False - readline.write_history_file('~/.riscemu_history') + readline.write_history_file(HIST_FILE) diff --git a/riscemu/helpers.py b/riscemu/helpers.py index 9c94635..3048cb1 100644 --- a/riscemu/helpers.py +++ b/riscemu/helpers.py @@ -46,7 +46,7 @@ def int_from_bytes(bytes, unsigned=False) -> int: if unsigned: return num - return to_signed(num) + return to_signed(num, len(bytes)) def to_unsigned(num: int, bytes=4) -> int: diff --git a/riscemu/parser.py b/riscemu/parser.py index 711abba..f70cfa3 100644 --- a/riscemu/parser.py +++ b/riscemu/parser.py @@ -31,7 +31,7 @@ def parse_label(token: Token, args: Tuple[str], context: ParseContext): else: if name in context.context.labels: print(FMT_PARSE + 'Warn: Symbol {} defined twice!'.format(name)) - context.context.labels[name] = context.section.current_address() + context.add_label(name, context.section.current_address(), is_relative=True) PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = { @@ -53,7 +53,6 @@ def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program: for token, args in composite_tokenizer(Peekable[Token](tokens_iter)): if token.type not in PARSERS: raise ParseException("Unexpected token type: {}, {}".format(token, args)) - print('{}: {}'.format(token, args)) PARSERS[token.type](token, args, context) return context.finalize() @@ -74,8 +73,6 @@ def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, T token = next(tokens) if token.type in (TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME): yield token, tuple(take_arguments(tokens)) - else: - print("skipped {}".format(token)) def take_arguments(tokens: Peekable[Token]) -> Iterable[str]: diff --git a/riscemu/priv/PrivMMU.py b/riscemu/priv/PrivMMU.py index f6a86d6..1504ab2 100644 --- a/riscemu/priv/PrivMMU.py +++ b/riscemu/priv/PrivMMU.py @@ -24,7 +24,7 @@ class PrivMMU(MMU): # calc start end end of "free" space prev_sec_end = 0 if sec_before is None else sec_before.end - next_sec_start = 0x7FFFFFFF if sec_after is None else sec_before.base + next_sec_start = 0x7FFFFFFF if sec_after is None else sec_after.base # start at the end of the prev section, or current address - 0xFFFF (aligned to 16 byte boundary) start = max(prev_sec_end, align_addr(addr - 0xFFFF, 16)) diff --git a/riscemu/types.py b/riscemu/types.py index f5c9989..0746d3d 100644 --- a/riscemu/types.py +++ b/riscemu/types.py @@ -258,11 +258,10 @@ class Program: @property def entrypoint(self): - base = 0 if self.base is None else self.base if '_start' in self.context.labels: - return base + self.context.labels.get('_start') + return self.context.labels.get('_start') if 'main' in self.context.labels: - return base + self.context.labels.get('main') + return self.context.labels.get('main') for sec in self.sections: if get_section_base_name(sec.name) == '.text' and sec.flags.executable: return sec.base From cd5795bb744869866bec72c272572e23cb89af2f Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Fri, 18 Feb 2022 10:17:12 +0100 Subject: [PATCH 14/30] fixed priv start code, added tests --- riscemu/priv/ImageLoader.py | 2 +- riscemu/priv/PrivCPU.py | 13 +++-- riscemu/priv/__main__.py | 37 +++++++------- test/end_to_end/__init__.py | 0 test/end_to_end/end_to_end_test.py | 73 ++++++++++++++++++++++++++++ test/test_isa.py | 77 ++++++++++++++++++++++++++++++ test/testcases/__main__.py | 53 ++++++++++++++++++++ test/testcases/half-loads.asm | 7 +++ test/testcases/symbols.asm | 20 ++++++++ 9 files changed, 261 insertions(+), 21 deletions(-) create mode 100644 test/end_to_end/__init__.py create mode 100644 test/end_to_end/end_to_end_test.py create mode 100644 test/test_isa.py create mode 100644 test/testcases/__main__.py create mode 100644 test/testcases/half-loads.asm create mode 100644 test/testcases/symbols.asm diff --git a/riscemu/priv/ImageLoader.py b/riscemu/priv/ImageLoader.py index b711568..9ef86e6 100644 --- a/riscemu/priv/ImageLoader.py +++ b/riscemu/priv/ImageLoader.py @@ -17,7 +17,7 @@ class MemoryImageLoader(ProgramLoader): @classmethod def can_parse(cls, source_path: str) -> float: - if source_path.split('.')[-1] == '.img': + if source_path.split('.')[-1] == 'img': return 1 return 0 diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index a6d9c5a..483300e 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -11,6 +11,7 @@ from .CSR import CSR from .ElfLoader import ElfBinaryFileLoader from .Exceptions import * from .ImageLoader import MemoryImageLoader +from .PrivMMU import PrivMMU from .PrivRV32I import PrivRV32I from .privmodes import PrivModes from ..instructions import RV32A, RV32M @@ -45,7 +46,7 @@ class PrivCPU(CPU): """ def __init__(self, conf): - super().__init__(MMU(), [PrivRV32I, RV32M, RV32A], conf) + super().__init__(PrivMMU(), [PrivRV32I, RV32M, RV32A], conf) # start in machine mode self.mode: PrivModes = PrivModes.MACHINE @@ -90,11 +91,17 @@ class PrivCPU(CPU): print() print(FMT_CPU + "[CPU] System stopped without halting - perhaps you stopped the debugger?" + FMT_NONE) - def launch(self, program: Program, verbose: bool = False): + def launch(self, program: Optional[Program] = None, verbose: bool = False): print(FMT_CPU + '[CPU] Started running from 0x{:08X} ({})'.format(self.pc, "kernel") + FMT_NONE) self._time_start = time.perf_counter_ns() // self.TIME_RESOLUTION_NS + self.run(self.conf.verbosity > 1 or verbose) + def load_program(self, program: Program): + if program.name == 'kernel': + self.pc = program.entrypoint + super().load_program(program) + def _init_csr(self): # set up CSR self.csr = CSR() @@ -230,4 +237,4 @@ class PrivCPU(CPU): def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]: return [ AssemblyFileLoader, MemoryImageLoader, ElfBinaryFileLoader - ] \ No newline at end of file + ] diff --git a/riscemu/priv/__main__.py b/riscemu/priv/__main__.py index 2363de4..bbdd1fb 100644 --- a/riscemu/priv/__main__.py +++ b/riscemu/priv/__main__.py @@ -1,3 +1,5 @@ +from riscemu import RunConfig +from riscemu.types import Program from .PrivCPU import PrivCPU from .ElfLoader import ElfBinaryFileLoader from .ImageLoader import MemoryImageLoader @@ -9,26 +11,27 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='RISC-V privileged architecture emulator', prog='riscemu') - parser.add_argument('--kernel', type=str, help='Kernel elf loaded with user programs', nargs='?') - parser.add_argument('--image', type=str, help='Memory image containing kernel', nargs='?') - parser.add_argument('--debug-exceptions', help='Launch the interactive debugger when an exception is generated', action='store_true') + parser.add_argument('source', type=str, + help='Compiled RISC-V ELF file or memory image containing compiled RISC-V ELF files', nargs='+') + parser.add_argument('--debug-exceptions', help='Launch the interactive debugger when an exception is generated', + action='store_true') - parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count', default=0) + parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count', + default=0) args = parser.parse_args() - mmu = None - - if args.kernel is not None: - mmu = LoadedElfMMU(ElfExecutable(args.kernel)) - elif args.image is not None: - mmu = MemoryImageMMU(args.image) - - if mmu is None: - print("You must specify one of --kernel or --image for running in privilege mode!") - sys.exit(1) - - cpu = PrivCPU(RunConfig(verbosity=args.verbose, debug_on_exception=args.debug_exceptions), mmu) - cpu.run() + cpu = PrivCPU(RunConfig(verbosity=args.verbose, debug_on_exception=args.debug_exceptions)) + for source_path in args.source: + loader = max((loader for loader in cpu.get_loaders()), key=lambda l: l.can_parse(source_path)) + argv, opts = loader.get_options(sys.argv) + program = loader.instantiate(source_path, opts).parse() + if isinstance(program, Program): + cpu.load_program(program) + else: + program_iter = program + for program in program_iter: + cpu.load_program(program) + cpu.launch() diff --git a/test/end_to_end/__init__.py b/test/end_to_end/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/end_to_end/end_to_end_test.py b/test/end_to_end/end_to_end_test.py new file mode 100644 index 0000000..c1a4e90 --- /dev/null +++ b/test/end_to_end/end_to_end_test.py @@ -0,0 +1,73 @@ +import contextlib +import os +from abc import abstractmethod +from tempfile import NamedTemporaryFile +from typing import Optional, Union, Tuple +from unittest import TestCase + +from riscemu import CPU, UserModeCPU, InstructionSetDict, RunConfig +from riscemu.types import Program + + +class EndToEndTest(TestCase): + + def __init__(self, cpu: Optional[CPU] = None): + super().__init__() + if cpu is None: + cpu = UserModeCPU(InstructionSetDict.values(), RunConfig()) + self.cpu = cpu + + @abstractmethod + def get_source(self) -> Tuple[str, Union[bytes, str, bytearray]]: + """ + This method returns the source code of the program + :return: + """ + pass + + def test_run_program(self): + """ + Runs the program and verifies output + :return: + """ + with self.with_source_file() as names: + fname, orig_name = names + loader = self.cpu.get_best_loader_for(fname) + self.program = loader.instantiate(fname, loader.get_options([])).parse() + self._change_program_file_name(self.program, orig_name) + self.cpu.load_program(self.program) + self.after_program_load(self.program) + if isinstance(self.cpu, UserModeCPU): + self.cpu.setup_stack() + try: + self.cpu.launch(self.program) + except Exception as ex: + if self.is_exception_expected(ex): + pass + raise ex + + @contextlib.contextmanager + def with_source_file(self): + name, content = self.get_source() + if isinstance(content, str): + f = NamedTemporaryFile('w', suffix=name, delete=False) + else: + f = NamedTemporaryFile('wb', suffix=name, delete=False) + f.write(content) + f.flush() + f.close() + try: + yield f.name, name + finally: + os.unlink(f.name) + + def after_program_load(self, program): + pass + + def is_exception_expected(self, ex: Exception) -> bool: + return False + + def _change_program_file_name(self, program: Program, new_name: str): + program.name = new_name + for sec in program.sections: + sec.owner = new_name diff --git a/test/test_isa.py b/test/test_isa.py new file mode 100644 index 0000000..80a7a13 --- /dev/null +++ b/test/test_isa.py @@ -0,0 +1,77 @@ +from riscemu.colors import FMT_ERROR, FMT_NONE, FMT_BOLD, FMT_GREEN +from riscemu.exceptions import ASSERT_LEN +from riscemu.helpers import int_from_bytes +from riscemu.instructions import InstructionSet +from riscemu.types import Instruction, CPU +from riscemu.decoder import RISCV_REGS + +FMT_SUCCESS = FMT_GREEN + FMT_BOLD + + +def assert_equals(ins: Instruction, cpu: CPU): + a, b = (get_arg_from_ins(ins, i, cpu) for i in (0, 2)) + return a == b + + +def assert_equals_mem(ins: Instruction, cpu: CPU): + a, b = (get_arg_from_ins(ins, i, cpu) for i in (0, 2)) + a = cpu.mmu.read_int(a) + return a == b + + +def assert_in(ins: Instruction, cpu: CPU): + a = get_arg_from_ins(ins, 0, cpu) + others = [get_arg_from_ins(ins, i, cpu) for i in range(2, len(ins.args))] + return a in others + + +def _not(func): + def test(ins: Instruction, cpu: CPU): + return not func(ins, cpu) + + return test + + +def get_arg_from_ins(ins: Instruction, num: int, cpu: CPU): + a = ins.args[num] + if a in RISCV_REGS: + return cpu.regs.get(a) + return ins.get_imm(num) + + +assert_ops = { + '==': assert_equals, + '!=': _not(assert_equals), + 'in': assert_in, + 'not_in': _not(assert_in), +} + + +class TestIS(InstructionSet): + def __init__(self, cpu: 'CPU'): + print('[Test] loading testing ISA, this is only meant for running testcases and is not part of the RISC-V ISA!') + self.failed = False + super().__init__(cpu) + + def instruction_assert(self, ins: Instruction): + if len(ins.args) < 3: + print(FMT_ERROR + '[Test] Unknown assert statement: {}'.format(ins) + FMT_NONE) + return + op = ins.args[1] + if op not in assert_ops: + print(FMT_ERROR + '[Test] Unknown operation statement: {} in {}'.format(op, ins) + FMT_NONE) + return + + if assert_ops[op](ins, self.cpu): + print(FMT_SUCCESS + '[TestCase] 🟢 passed assertion {}'.format(ins)) + else: + print(FMT_ERROR + '[TestCase] 🔴 failed assertion {}'.format(ins)) + self.cpu.halted = True + self.failed = True + + def instruction_fail(self, ins: Instruction): + print(FMT_ERROR + '[TestCase] 🔴 reached fail instruction! {}'.format(ins)) + self.cpu.halted = True + self.failed = True + + def assert_mem(self, ins: Instruction): \ No newline at end of file diff --git a/test/testcases/__main__.py b/test/testcases/__main__.py new file mode 100644 index 0000000..2b8fd43 --- /dev/null +++ b/test/testcases/__main__.py @@ -0,0 +1,53 @@ +from riscemu import AssemblyFileLoader +from riscemu.colors import * + +FMT_SUCCESS = FMT_GREEN + FMT_BOLD + +def run_test(path: str): + from riscemu import CPU, UserModeCPU, RunConfig + from riscemu.instructions import InstructionSetDict + from test.test_isa import TestIS + import os + + fname = os.path.basename(path) + + ISAs = list(InstructionSetDict.values()) + ISAs.append(TestIS) + + cpu = UserModeCPU(ISAs, RunConfig()) + try: + program = AssemblyFileLoader(path, {}).parse() + cpu.load_program(program) + cpu.launch(program) + except Exception as ex: + print(FMT_ERROR + '[Test] 🔴 failed with exception "{}" ({})'.format(ex, fname) + FMT_NONE) + raise ex + + if cpu.halted: + for isa in cpu.instruction_sets: + if isinstance(isa, TestIS): + if not isa.failed: + print(FMT_SUCCESS + '[Test] 🟢 successful {}'.format(fname) + FMT_NONE) + return not isa.failed + return False + + +if __name__ == '__main__': + + import os + import glob + + successes = 0 + failures = 0 + ttl = 0 + + for path in glob.glob(f'{os.path.dirname(__file__)}/*.asm'): + print(FMT_BLUE + '[Test] running testcase ' + os.path.basename(path) + FMT_NONE) + ttl += 1 + if run_test(path): + successes += 1 + else: + failures += 1 + + + diff --git a/test/testcases/half-loads.asm b/test/testcases/half-loads.asm new file mode 100644 index 0000000..f0a2ce5 --- /dev/null +++ b/test/testcases/half-loads.asm @@ -0,0 +1,7 @@ +.data + +data: +.word 0xFFFFFFFF, 0x0000FFFF, 0xFF00FF00, 0x7FFFFFFF + +.text + ebreak diff --git a/test/testcases/symbols.asm b/test/testcases/symbols.asm new file mode 100644 index 0000000..107ab45 --- /dev/null +++ b/test/testcases/symbols.asm @@ -0,0 +1,20 @@ +.text + +main: + addi a0, zero, main + addi a1, zero, main + addi t0, zero, 1000 + assert a0, ==, 0x100 +1: + addi a1, a1, 1 + blt a1, t0, 1b + sub a1, a1, a0 + j 1f + addi a1, zero, 0 + fail +1: + assert a1, ==, 744 + add a0, zero, a1 ; set exit code to a1 + addi a7, zero, SCALL_EXIT ; exit syscall code + scall + fail \ No newline at end of file From 71093fe72f85a4ce207df7ff45d5549f584ffe65 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 15:21:10 +0200 Subject: [PATCH 15/30] Maor round of bugfixes and incremental improvements - fixed errors in TextIO and IOModule - moved to Int32 and UInt32 based arithmetic - added a lot of end-to-end and other tests --- .idea/riscemu.iml | 3 + riscemu/CPU.py | 4 +- riscemu/IO/IOModule.py | 30 ++-- riscemu/IO/TextIO.py | 72 ++------ riscemu/MMU.py | 68 ++++++-- riscemu/__init__.py | 2 +- riscemu/assembler.py | 13 +- riscemu/decoder/formatter.py | 2 +- riscemu/helpers.py | 42 +---- riscemu/instructions/RV32A.py | 48 +++--- riscemu/instructions/RV32I.py | 57 +++---- riscemu/instructions/instruction_set.py | 37 ++-- riscemu/priv/CSR.py | 28 +-- riscemu/priv/ElfLoader.py | 4 +- riscemu/priv/Exceptions.py | 5 +- riscemu/priv/ImageLoader.py | 10 +- riscemu/priv/PrivCPU.py | 28 +-- riscemu/priv/PrivRV32I.py | 33 ++-- riscemu/priv/__main__.py | 2 +- riscemu/priv/types.py | 16 +- riscemu/registers.py | 18 +- riscemu/types.py | 216 +++++++++++++++++++++++- setup.py | 2 +- test/__init__.py | 3 +- test/test_helpers.py | 23 --- test/test_integers.py | 19 +++ test/test_isa.py | 2 - 27 files changed, 486 insertions(+), 301 deletions(-) create mode 100644 test/test_integers.py diff --git a/.idea/riscemu.iml b/.idea/riscemu.iml index 8ed6672..71b6faa 100644 --- a/.idea/riscemu.iml +++ b/.idea/riscemu.iml @@ -4,6 +4,9 @@ + + + diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 3b9235d..2fcedd0 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -17,7 +17,7 @@ from .colors import FMT_CPU, FMT_NONE from .debug import launch_debug_session from .exceptions import RiscemuBaseException, LaunchDebuggerException from .syscall import SyscallInterface, get_syscall_symbols -from .types import CPU, ProgramLoader +from .types import CPU, ProgramLoader, Int32 from .parser import AssemblyFileLoader if typing.TYPE_CHECKING: @@ -107,7 +107,7 @@ class UserModeCPU(CPU): if not self.mmu.load_section(stack_sec, fixed_position=False): return False - self.regs.set('sp', stack_sec.base + stack_sec.size) + self.regs.set('sp', Int32(stack_sec.base + stack_sec.size)) return True @classmethod diff --git a/riscemu/IO/IOModule.py b/riscemu/IO/IOModule.py index 21d6a97..521ae93 100644 --- a/riscemu/IO/IOModule.py +++ b/riscemu/IO/IOModule.py @@ -1,22 +1,22 @@ from abc import ABC, abstractmethod +from typing import Optional +from riscemu.types import MemorySection, MemoryFlags, T_RelativeAddress -class IOModule(ABC): - addr: int - size: int - def __init__(self, addr: int, size: int): - self.addr = addr - self.size = size +class IOModule(MemorySection, ABC): + def __init__(self, name: str, flags: MemoryFlags, size: int, owner: str = 'system', base: int = 0): + super(IOModule, self).__init__(name, flags, size, base, owner, None) - @abstractmethod - def read(self, addr: int, size: int): - pass + def contains(self, addr, size: int = 0): + return self.base <= addr < self.base + self.size and \ + self.base <= addr + size <= self.base + self.size - @abstractmethod - def write(self, addr: int, data: bytearray, size: int): - pass + def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex', + bytes_per_row: int = 16, rows: int = 10, group: int = 4): + print(self) - def contains(self, addr, size: int = 0): - return self.addr <= addr < self.addr + self.size and \ - self.addr <= addr + size <= self.addr + self.size + def __repr__(self): + return "{}[{}] at 0x{:0X} (size={}bytes, flags={})".format( + self.__class__.__name__, self.name, self.base, self.size, self.flags + ) \ No newline at end of file diff --git a/riscemu/IO/TextIO.py b/riscemu/IO/TextIO.py index 1a615e0..a57e1ab 100644 --- a/riscemu/IO/TextIO.py +++ b/riscemu/IO/TextIO.py @@ -1,70 +1,28 @@ from .IOModule import IOModule from ..priv.Exceptions import InstructionAccessFault -from ..helpers import int_from_bytes -from threading import Thread -import time +from ..types import T_RelativeAddress, Instruction, MemoryFlags, Int32 -def _window_loop(textIO: 'TextIO'): - try: - import PySimpleGUI as sg - - logs = sg.Text(font="monospace") - col = sg.Column([[logs]], size=(640, 400), scrollable=True) - window = sg.Window("TextIO:{:x}".format(textIO.addr), [[col]]) - lines = list() - - window.finalize() - textIO.set_sg_window(window) - while True: - e, v = window.read() - if e == sg.WINDOW_CLOSED: - window.close() - textIO.set_sg_window(None) - break - if e == 'putlog': - lines.insert(0, v[0]) - logs.update(value='\n'.join(lines) + '\n') - col.contents_changed() - - except ImportError: - print("[TextIO] window disabled - please install PySimpleGui!") - textIO.set_sg_window(None) - class TextIO(IOModule): - def __init__(self, addr: int, buflen: int = 128): - super(TextIO, self).__init__(addr, buflen + 4) + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + raise InstructionAccessFault(self.base + offset) + + def __init__(self, base: int, buflen: int = 128): + super(TextIO, self).__init__('TextIO', MemoryFlags(False, False), buflen + 4, base=base) self.buff = bytearray(buflen) self.current_line = "" - self.sg_window = None - self.start_buffer = list() - - self.thread = Thread(target=_window_loop, args=(self,)) - self.thread.start() - time.sleep(0.1) - - def set_sg_window(self, window): - if self.sg_window is not None and window is not None: - raise Exception("cannot set window twice!") - self.sg_window = window - - buff = self.start_buffer - self.start_buffer = None if window is None else list() - - for line in buff: - self._present(line) def read(self, addr: int, size: int) -> bytearray: - raise InstructionAccessFault(addr) + raise InstructionAccessFault(self.base + addr) - def write(self, addr: int, data: bytearray, size: int): - if addr == self.addr: + def write(self, addr: int, size: int, data: bytearray): + if addr == 0: if size > 4: raise InstructionAccessFault(addr) - if int_from_bytes(data[0:4]) > 0: + if Int32(data) != 0: self._print() return - buff_start = addr - self.addr - 4 + buff_start = addr - 4 self.buff[buff_start:buff_start + size] = data[0:size] def _print(self): @@ -83,10 +41,4 @@ class TextIO(IOModule): self.current_line += text def _present(self, text: str): - if self.sg_window is not None: - self.sg_window.write_event_value('putlog', text) - elif self.start_buffer is not None: - self.start_buffer.append(text) - else: - print("[TextIO:{:x}] {}".format(self.addr, text)) - + print("[TextIO:{:x}] {}".format(self.base, text)) diff --git a/riscemu/MMU.py b/riscemu/MMU.py index eeb75d7..fdbf822 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -4,13 +4,13 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from .colors import * from .exceptions import InvalidAllocationException, MemoryAccessException -from .helpers import align_addr, int_from_bytes +from .helpers import align_addr from .types import Instruction, MemorySection, MemoryFlags, T_AbsoluteAddress, \ - Program, InstructionContext + Program, InstructionContext, Int32 class MMU: @@ -85,7 +85,7 @@ class MMU: raise RuntimeError("No next instruction available!") return sec.read_ins(addr - sec.base) - def read(self, addr: int, size: int) -> bytearray: + def read(self, addr: Union[int, Int32], size: int) -> bytearray: """ Read size bytes of memory at addr @@ -93,13 +93,16 @@ class MMU: :param size: The number of bytes to read :return: The bytearray at addr """ + if isinstance(addr, Int32): + breakpoint() + addr = addr.unsigned_value sec = self.get_sec_containing(addr) if sec is None: print(FMT_MEM + "[MMU] Trying to read data form invalid region at 0x{:x}! ".format(addr) + FMT_NONE) raise MemoryAccessException("region is non-initialized!", addr, size, 'read') return sec.read(addr - sec.base, size) - def write(self, addr: int, size: int, data): + def write(self, addr: int, size: int, data: bytearray): """ Write bytes into memory @@ -137,32 +140,51 @@ class MMU: print(FMT_MEM + "[MMU] Lookup for symbol {}:".format(symb) + FMT_NONE) if symb in self.global_symbols: print(" Found global symbol {}: 0x{:X}".format(symb, self.global_symbols[symb])) - for section in self.sections: - if symb in section.context.labels: - print(" Found local labels {}: 0x{:X} in {}".format(symb, section.context.labels[symb], section.name)) + for bin in self.programs: + if symb in bin.context.labels: + print(" Found local labels {}: 0x{:X} in {}".format(symb, bin.context.labels[symb], bin.name)) - def read_int(self, addr: int) -> int: - return int_from_bytes(self.read(addr, 4)) + def read_int(self, addr: int) -> Int32: + return Int32(self.read(addr, 4)) def translate_address(self, address: T_AbsoluteAddress) -> str: - # FIXME: proper implementation using the debug info - return str(address) + sec = self.get_sec_containing(address) + if not sec: + return "unknown at 0x{:0x}".format(address) + + bin = self.get_bin_containing(address) + secs = set(sec.name for sec in bin.sections) if bin else [] + + def key(x): + name, val = x + + if name in secs or val > address: + return float('inf') + return address - val + + name, val = min(sec.context.labels.items(), key=key, default=('.empty', None)) + if val is None: + return "unknown at 0x{:0x}".format(address) + + return str('{}:{} at {} (0x{:0x}) + 0x{:0x}'.format( + sec.owner, sec.name, name, val, address - val + )) def has_continous_free_region(self, start: int, end: int) -> bool: # if we have no sections we are all good if len(self.sections) == 0: return True # if the last section is located before the start we are also good - if start > self.sections[-1].base + self.sections[-1].size: + if start >= self.sections[-1].base + self.sections[-1].size: return True for sec in self.sections: # skip all sections that end before the required start point - if sec.base + sec.size < start: + if sec.base + sec.size <= start: continue # we now have the first section that doesn't end **before** the start point # if this section starts after the specified end, we are good - if sec.base > end: + if sec.base >= end: return True # otherwise we can't continue return False @@ -230,7 +252,8 @@ class MMU: return self.sections[-1].base + self.sections[-1].size def __repr__(self): - return "MMU(\n\t{}\n)".format( + return "{}(\n\t{}\n)".format( + self.__class__.__name__, "\n\t".join(repr(x) for x in self.programs) ) @@ -241,3 +264,16 @@ class MMU: return sec.context return InstructionContext() + + def report_addr(self, addr: T_AbsoluteAddress): + sec = self.get_sec_containing(addr) + if not sec: + print("addr is in no section!") + return + owner = [b for b in self.programs if b.name == sec.owner] + if owner: + print("owned by: {}".format(owner[0])) + + + print("{}: 0x{:0x} + 0x{:0x}".format(name, val, addr - val)) + diff --git a/riscemu/__init__.py b/riscemu/__init__.py index 22d1f8b..6c39581 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -25,4 +25,4 @@ from .parser import tokenize, parse_tokens, AssemblyFileLoader __author__ = "Anton Lydike " __copyright__ = "Copyright 2021 Anton Lydike" -__version__ = '1.0.0' \ No newline at end of file +__version__ = '2.0.0a1' diff --git a/riscemu/assembler.py b/riscemu/assembler.py index 768d504..8e0fca0 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -1,13 +1,13 @@ -from typing import Optional, Tuple, Union, List from enum import Enum, auto +from typing import List from typing import Optional, Tuple, Union -from .helpers import parse_numeric_argument, align_addr, int_to_bytes, get_section_base_name -from .types import Program, T_RelativeAddress, InstructionContext, Instruction +from .base import BinaryDataMemorySection, InstructionMemorySection from .colors import FMT_PARSE, FMT_NONE -from .exceptions import ParseException, ASSERT_LEN, ASSERT_NOT_NULL +from .exceptions import ParseException, ASSERT_LEN +from .helpers import parse_numeric_argument, align_addr, get_section_base_name from .tokenizer import Token -from .base import BinaryDataMemorySection, InstructionMemorySection +from .types import Program, T_RelativeAddress, InstructionContext, Instruction, UInt32, Int32 INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini') """ @@ -96,7 +96,6 @@ class ParseContext: if is_relative: self.program.relative_labels.add(name) - def __repr__(self): return "{}(\n\tsetion={},\n\tprogram={}\n)".format( self.__class__.__name__, self.section, self.program @@ -176,7 +175,7 @@ class AssemblerDirectives: if content is None: content = bytearray(size) if isinstance(content, int): - content = int_to_bytes(content, size, unsigned) + content = bytearray(content.to_bytes(size, 'little', signed=not unsigned)) context.section.data += content diff --git a/riscemu/decoder/formatter.py b/riscemu/decoder/formatter.py index c1c7955..0d7d304 100644 --- a/riscemu/decoder/formatter.py +++ b/riscemu/decoder/formatter.py @@ -24,7 +24,7 @@ def format_ins(ins: int, name: str, fmt: str = 'int'): return name if opcode in (0x8, 0x0): r1, r2, imm = decoder(ins) - return f"{name:<7} {r1}, {imm}({r2})" + return f"{name:<7} {RISCV_REGS[r1]}, {imm}({RISCV_REGS[r2]})" elif decoder in (decode_i, decode_i_unsigned, decode_b, decode_i_shamt, decode_s): r1, r2, imm = decoder(ins) r1, r2 = RISCV_REGS[r1], RISCV_REGS[r2] diff --git a/riscemu/helpers.py b/riscemu/helpers.py index 3048cb1..82774d1 100644 --- a/riscemu/helpers.py +++ b/riscemu/helpers.py @@ -5,9 +5,11 @@ SPDX-License-Identifier: MIT """ from math import log10, ceil -from .exceptions import * from typing import Iterable, Iterator, TypeVar, Generic, List, Optional +from .exceptions import * +import types + def align_addr(addr: int, to_bytes: int = 8) -> int: """ @@ -28,40 +30,6 @@ def parse_numeric_argument(arg: str) -> int: raise ParseException('Invalid immediate argument \"{}\", maybe missing symbol?'.format(arg), (arg, ex)) -def int_to_bytes(val, bytes=4, unsigned=False) -> bytearray: - """ - int -> byte (two's complement) - """ - if unsigned and val < 0: - raise NumberFormatException("unsigned negative number!") - return bytearray(to_unsigned(val, bytes).to_bytes(bytes, 'little')) - - -def int_from_bytes(bytes, unsigned=False) -> int: - """ - byte -> int (two's complement) - """ - num = int.from_bytes(bytes, 'little') - - if unsigned: - return num - - return to_signed(num, len(bytes)) - - -def to_unsigned(num: int, bytes=4) -> int: - if num < 0: - return (2 ** (bytes * 8)) + num - return num - - -def to_signed(num: int, bytes=4) -> int: - if num >> (bytes * 8 - 1): - return num - 2 ** (8 * bytes) - return num - - - def create_chunks(my_list, chunk_size): """Split a list like [a,b,c,d,e,f,g,h,i,j,k,l,m] into e.g. [[a,b,c,d],[e,f,g,h],[i,j,k,l],[m]]""" return [my_list[i:i + chunk_size] for i in range(0, len(my_list), chunk_size)] @@ -87,10 +55,10 @@ def format_bytes(byte_arr: bytearray, fmt: str, group: int = 1, highlight: int = return highlight_in_list(['0x{}'.format(ch.hex()) for ch in chunks], highlight) if fmt == 'int': spc = str(ceil(log10(2 ** (group * 8 - 1))) + 1) - return highlight_in_list([('{:0' + spc + 'd}').format(int_from_bytes(ch)) for ch in chunks], highlight) + return highlight_in_list([('{:0' + spc + 'd}').format(types.Int32(ch)) for ch in chunks], highlight) if fmt == 'uint': spc = str(ceil(log10(2 ** (group * 8)))) - return highlight_in_list([('{:0' + spc + 'd}').format(int_from_bytes(ch, unsigned=True)) for ch in chunks], + return highlight_in_list([('{:0' + spc + 'd}').format(types.UInt32(ch)) for ch in chunks], highlight) if fmt == 'ascii': return "".join(repr(chr(b))[1:-1] for b in byte_arr) diff --git a/riscemu/instructions/RV32A.py b/riscemu/instructions/RV32A.py index 44c3f32..c7f7c15 100644 --- a/riscemu/instructions/RV32A.py +++ b/riscemu/instructions/RV32A.py @@ -1,6 +1,6 @@ from .instruction_set import InstructionSet, Instruction from ..exceptions import INS_NOT_IMPLEMENTED -from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed +from ..types import Int32, UInt32 class RV32A(InstructionSet): @@ -19,60 +19,60 @@ class RV32A(InstructionSet): def instruction_amoswap_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) if dest == 'zero': - self.mmu.write(addr, int_to_bytes(addr, 4)) + self.mmu.write(addr, val.to_bytes()) else: - old = int_from_bytes(self.mmu.read(addr, 4)) - self.mmu.write(addr, int_to_bytes(val, 4)) + old = Int32(self.mmu.read(addr, 4)) + self.mmu.write(addr, val.to_bytes()) self.regs.set(dest, old) def instruction_amoadd_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) - old = int_from_bytes(self.mmu.read(addr, 4)) - self.mmu.write(addr, int_to_bytes(old + val, 4)) + old = Int32(self.mmu.read(addr, 4)) + self.mmu.write(addr, (old + val).to_bytes(4)) self.regs.set(dest, old) def instruction_amoand_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) - old = int_from_bytes(self.mmu.read(addr, 4)) - self.mmu.write(addr, int_to_bytes(old & val, 4)) + old = Int32(self.mmu.read(addr, 4)) + self.mmu.write(addr, (old & val).to_bytes(4)) self.regs.set(dest, old) def instruction_amoor_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) - old = int_from_bytes(self.mmu.read(addr, 4)) - self.mmu.write(addr, int_to_bytes(old | val, 4)) + old = Int32(self.mmu.read(addr, 4)) + self.mmu.write(addr, (old | val).to_bytes(4)) self.regs.set(dest, old) def instruction_amoxor_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) - old = int_from_bytes(self.mmu.read(addr, 4)) - self.mmu.write(addr, int_to_bytes(old ^ val, 4)) + old = Int32(self.mmu.read(addr, 4)) + self.mmu.write(addr, (old ^ val).to_bytes(4)) self.regs.set(dest, old) def instruction_amomax_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) - old = int_from_bytes(self.mmu.read(addr, 4)) - self.mmu.write(addr, int_to_bytes(max(old, val), 4)) + old = Int32(self.mmu.read(addr, 4)) + self.mmu.write(addr, max(old, val).to_bytes(4)) self.regs.set(dest, old) def instruction_amomaxu_w(self, ins: 'Instruction'): - dest, addr, val = self.parse_rd_rs_rs(ins) - val = to_unsigned(val) - old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True) + val: UInt32 + dest, addr, val = self.parse_rd_rs_rs(ins, signed=False) + old = UInt32(self.mmu.read(addr, 4)) - self.mmu.write(addr, int_to_bytes(to_signed(max(old, val)), 4)) + self.mmu.write(addr, max(old, val).to_bytes()) self.regs.set(dest, old) def instruction_amomin_w(self, ins: 'Instruction'): dest, addr, val = self.parse_rd_rs_rs(ins) - old = int_from_bytes(self.mmu.read(addr, 4)) - self.mmu.write(addr, int_to_bytes(min(old, val), 4)) + old = Int32(self.mmu.read(addr, 4)) + self.mmu.write(addr, min(old, val).to_bytes(4)) self.regs.set(dest, old) def instruction_amominu_w(self, ins: 'Instruction'): - dest, addr, val = self.parse_rd_rs_rs(ins) - val = to_unsigned(val) - old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True) + val: UInt32 + dest, addr, val = self.parse_rd_rs_rs(ins, signed=False) + old = UInt32(self.mmu.read(addr, 4)) - self.mmu.write(addr, int_to_bytes(to_signed(min(old, val)), 4)) + self.mmu.write(addr, min(old, val).to_bytes(4)) self.regs.set(dest, old) diff --git a/riscemu/instructions/RV32I.py b/riscemu/instructions/RV32I.py index 291ccbe..26d0bd9 100644 --- a/riscemu/instructions/RV32I.py +++ b/riscemu/instructions/RV32I.py @@ -7,12 +7,11 @@ SPDX-License-Identifier: MIT from .instruction_set import * from ..CPU import UserModeCPU -from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed from ..colors import FMT_DEBUG, FMT_NONE from ..debug import launch_debug_session from ..exceptions import LaunchDebuggerException from ..syscall import Syscall -from ..types import Instruction +from ..types import Instruction, Int32, UInt32 class RV32I(InstructionSet): @@ -26,35 +25,35 @@ class RV32I(InstructionSet): def instruction_lb(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1))) + self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 1))) def instruction_lh(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2))) + self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 2))) def instruction_lw(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 4))) + self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 4))) def instruction_lbu(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1), unsigned=True)) + self.regs.set(rd, UInt32(self.mmu.read(addr.unsigned_value, 1))) def instruction_lhu(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2), unsigned=True)) + self.regs.set(rd, UInt32(self.mmu.read(addr.unsigned_value, 2))) def instruction_sb(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.mmu.write(addr, 1, int_to_bytes(self.regs.get(rd), 1)) + self.mmu.write(addr.value, 1, self.regs.get(rd).to_bytes(1)) def instruction_sh(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.mmu.write(addr, 2, int_to_bytes(self.regs.get(rd), 2)) + self.mmu.write(addr.value, 2, self.regs.get(rd).to_bytes(2)) def instruction_sw(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.mmu.write(addr, 4, int_to_bytes(self.regs.get(rd), 4)) + self.mmu.write(addr.value, 4, self.regs.get(rd).to_bytes(4)) def instruction_sll(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) @@ -63,7 +62,7 @@ class RV32I(InstructionSet): src2 = ins.get_reg(2) self.regs.set( dst, - to_signed(to_unsigned(self.regs.get(src1)) << (self.regs.get(src2) & 0b11111)) + self.regs.get(src1) << (self.regs.get(src2) & 0b11111) ) def instruction_slli(self, ins: 'Instruction'): @@ -73,7 +72,7 @@ class RV32I(InstructionSet): imm = ins.get_imm(2) self.regs.set( dst, - to_signed(to_unsigned(self.regs.get(src1)) << (imm & 0b11111)) + self.regs.get(src1) << (imm & 0b11111) ) def instruction_srl(self, ins: 'Instruction'): @@ -83,7 +82,7 @@ class RV32I(InstructionSet): src2 = ins.get_reg(2) self.regs.set( dst, - to_signed(to_unsigned(self.regs.get(src1)) >> (self.regs.get(src2) & 0b11111)) + self.regs.get(src1).shift_right_logical(self.regs.get(src2) & 0b11111) ) def instruction_srli(self, ins: 'Instruction'): @@ -93,7 +92,7 @@ class RV32I(InstructionSet): imm = ins.get_imm(2) self.regs.set( dst, - to_signed(to_unsigned(self.regs.get(src1)) >> (imm & 0b11111)) + self.regs.get(src1).shift_right_logical(imm & 0b11111) ) def instruction_sra(self, ins: 'Instruction'): @@ -142,14 +141,14 @@ class RV32I(InstructionSet): def instruction_lui(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) - imm = ins.get_imm(1) - self.regs.set(reg, imm << 12) + imm = UInt32(ins.get_imm(1)) << 12 + self.regs.set(reg, Int32(imm)) def instruction_auipc(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) - imm = to_unsigned(ins.get_imm(1)) - self.regs.set(reg, self.pc + (imm << 12)) + imm = UInt32(ins.get_imm(1) << 12) + self.regs.set(reg, imm.signed() + self.pc) def instruction_xor(self, ins: 'Instruction'): rd, rs1, rs2 = self.parse_rd_rs_rs(ins) @@ -197,59 +196,59 @@ class RV32I(InstructionSet): rd, rs1, rs2 = self.parse_rd_rs_rs(ins) self.regs.set( rd, - int(rs1 < rs2) + Int32(int(rs1 < rs2)) ) def instruction_slti(self, ins: 'Instruction'): rd, rs1, imm = self.parse_rd_rs_imm(ins) self.regs.set( rd, - int(rs1 < imm) + Int32(int(rs1 < imm)) ) def instruction_sltu(self, ins: 'Instruction'): dst, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False) self.regs.set( dst, - int(rs1 < rs2) + Int32(int(rs1 < rs2)) ) def instruction_sltiu(self, ins: 'Instruction'): dst, rs1, imm = self.parse_rd_rs_imm(ins, signed=False) self.regs.set( dst, - int(rs1 < imm) + Int32(int(rs1 < imm)) ) def instruction_beq(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 == rs2: - self.pc = dst + self.pc = dst.unsigned_value def instruction_bne(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 != rs2: - self.pc = dst + self.pc = dst.unsigned_value def instruction_blt(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 < rs2: - self.pc = dst + self.pc = dst.unsigned_value def instruction_bge(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 >= rs2: - self.pc = dst + self.pc = dst.unsigned_value def instruction_bltu(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False) if rs1 < rs2: - self.pc = dst + self.pc = dst.unsigned_value def instruction_bgeu(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False) if rs1 >= rs2: - self.pc = dst + self.pc = dst.unsigned_value # technically deprecated def instruction_j(self, ins: 'Instruction'): @@ -277,7 +276,7 @@ class RV32I(InstructionSet): def instruction_ret(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 0) - self.pc = self.regs.get('ra') + self.pc = self.regs.get('ra').value def instruction_ecall(self, ins: 'Instruction'): self.instruction_scall(ins) diff --git a/riscemu/instructions/instruction_set.py b/riscemu/instructions/instruction_set.py index 8b277c6..e0d3f06 100644 --- a/riscemu/instructions/instruction_set.py +++ b/riscemu/instructions/instruction_set.py @@ -8,9 +8,8 @@ from typing import Tuple, Callable, Dict from abc import ABC from ..CPU import CPU -from ..helpers import to_unsigned from ..exceptions import ASSERT_LEN, ASSERT_IN -from ..types import Instruction +from ..types import Instruction, Int32, UInt32 class InstructionSet(ABC): @@ -52,7 +51,7 @@ class InstructionSet(ABC): if member.startswith('instruction_'): yield member[12:].replace('_', '.'), getattr(self, member) - def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, int]: + def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, Int32]: """ parses both rd, rs, imm and rd, imm(rs) argument format and returns (rd, imm+rs1) (so a register and address tuple for memory instructions) @@ -70,7 +69,7 @@ class InstructionSet(ABC): rd = ins.get_reg(0) return rd, rs + imm - def parse_rd_rs_rs(self, ins: 'Instruction', signed=True) -> Tuple[str, int, int]: + def parse_rd_rs_rs(self, ins: 'Instruction', signed=True) -> Tuple[str, Int32, Int32]: """ Assumes the command is in rd, rs1, rs2 format Returns the name of rd, and the values in rs1 and rs2 @@ -82,10 +81,10 @@ class InstructionSet(ABC): self.get_reg_content(ins, 2) else: return ins.get_reg(0), \ - to_unsigned(self.get_reg_content(ins, 1)), \ - to_unsigned(self.get_reg_content(ins, 2)) + UInt32(self.get_reg_content(ins, 1)), \ + UInt32(self.get_reg_content(ins, 2)) - def parse_rd_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[str, int, int]: + def parse_rd_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[str, Int32, Int32]: """ Assumes the command is in rd, rs, imm format Returns the name of rd, the value in rs and the immediate imm @@ -93,28 +92,28 @@ class InstructionSet(ABC): ASSERT_LEN(ins.args, 3) if signed: return ins.get_reg(0), \ - self.get_reg_content(ins, 1), \ - ins.get_imm(2) + Int32(self.get_reg_content(ins, 1)), \ + Int32(ins.get_imm(2)) else: return ins.get_reg(0), \ - to_unsigned(self.get_reg_content(ins, 1)), \ - to_unsigned(ins.get_imm(2)) + UInt32(self.get_reg_content(ins, 1)), \ + UInt32(ins.get_imm(2)) - def parse_rs_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[int, int, int]: + def parse_rs_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[Int32, Int32, Int32]: """ Assumes the command is in rs1, rs2, imm format Returns the values in rs1, rs2 and the immediate imm """ if signed: - return self.get_reg_content(ins, 0), \ - self.get_reg_content(ins, 1), \ - ins.get_imm(2) + return Int32(self.get_reg_content(ins, 0)), \ + Int32(self.get_reg_content(ins, 1)), \ + Int32(ins.get_imm(2)) else: - return to_unsigned(self.get_reg_content(ins, 0)), \ - to_unsigned(self.get_reg_content(ins, 1)), \ - to_unsigned(ins.get_imm(2)) + return UInt32(self.get_reg_content(ins, 0)), \ + UInt32(self.get_reg_content(ins, 1)), \ + UInt32(ins.get_imm(2)) - def get_reg_content(self, ins: 'Instruction', ind: int) -> int: + def get_reg_content(self, ins: 'Instruction', ind: int) -> Int32: """ get the register name from ins and then return the register contents """ diff --git a/riscemu/priv/CSR.py b/riscemu/priv/CSR.py index 4a2cc7b..fbd83c6 100644 --- a/riscemu/priv/CSR.py +++ b/riscemu/priv/CSR.py @@ -2,49 +2,49 @@ from typing import Dict, Union, Callable, Optional from collections import defaultdict from .privmodes import PrivModes from .Exceptions import InstructionAccessFault -from ..helpers import to_signed from ..colors import FMT_CSR, FMT_NONE from .CSRConsts import CSR_NAME_TO_ADDR, MSTATUS_LEN_2, MSTATUS_OFFSETS +from ..types import UInt32 class CSR: """ This holds all Control and Status Registers (CSR) """ - regs: Dict[int, int] + regs: Dict[int, UInt32] """ All Control and Status Registers are stored here """ - virtual_regs: Dict[int, Callable[[], int]] + virtual_regs: Dict[int, Callable[[], UInt32]] """ list of virtual CSR registers, with values computed on read """ - listeners: Dict[int, Callable[[int, int], None]] + listeners: Dict[int, Callable[[UInt32, UInt32], None]] - mstatus_cache: Dict[str, int] + mstatus_cache: Dict[str, UInt32] mstatus_cache_dirty = True def __init__(self): - self.regs = defaultdict(lambda: 0) + self.regs = defaultdict(lambda: UInt32(0)) self.listeners = defaultdict(lambda: (lambda x, y: None)) self.virtual_regs = dict() self.mstatus_cache = dict() # TODO: implement write masks (bitmasks which control writeable bits in registers - def set(self, addr: Union[str, int], val: int): + def set(self, addr: Union[str, int], val: Union[int, UInt32]): addr = self._name_to_addr(addr) if addr is None: return - val = to_signed(val) + val = UInt32(val) self.listeners[addr](self.regs[addr], val) if addr == 0x300: self.mstatus_cache_dirty = True self.regs[addr] = val - def get(self, addr: Union[str, int]) -> int: + def get(self, addr: Union[str, int]) -> UInt32: addr = self._name_to_addr(addr) if addr is None: raise RuntimeError(f"Invalid CSR name: {addr}!") @@ -52,7 +52,7 @@ class CSR: return self.virtual_regs[addr]() return self.regs[addr] - def set_listener(self, addr: Union[str, int], listener: Callable[[int, int], None]): + def set_listener(self, addr: Union[str, int], listener: Callable[[UInt32, UInt32], None]): addr = self._name_to_addr(addr) if addr is None: print("unknown csr address name: {}".format(addr)) @@ -60,7 +60,7 @@ class CSR: self.listeners[addr] = listener # mstatus properties - def set_mstatus(self, name: str, val: int): + def set_mstatus(self, name: str, val: UInt32): """ Set mstatus bits using this helper. mstatus is a 32 bit register, holding various machine status flags Setting them by hand is super painful, so this helper allows you to set specific bits. @@ -79,7 +79,7 @@ class CSR: new_val = erased | (val << off) self.set('mstatus', new_val) - def get_mstatus(self, name) -> int: + def get_mstatus(self, name) -> UInt32: if not self.mstatus_cache_dirty and name in self.mstatus_cache: return self.mstatus_cache[name] @@ -94,7 +94,7 @@ class CSR: return val def callback(self, addr: Union[str, int]): - def inner(func: Callable[[int, int], None]): + def inner(func: Callable[[UInt32, UInt32], None]): self.set_listener(addr, func) return func @@ -121,7 +121,7 @@ class CSR: if addr is None: print("unknown csr address name: {}".format(addr)) - def inner(func: Callable[[], int]): + def inner(func: Callable[[], UInt32]): self.virtual_regs[addr] = func return func diff --git a/riscemu/priv/ElfLoader.py b/riscemu/priv/ElfLoader.py index 48fab49..f8538c6 100644 --- a/riscemu/priv/ElfLoader.py +++ b/riscemu/priv/ElfLoader.py @@ -81,12 +81,14 @@ class ElfBinaryFileLoader(ProgramLoader): ) def _parse_symtab(self, symtab: 'SymbolTableSection'): + from elftools.elf.enums import ENUM_ST_VISIBILITY + for sym in symtab.iter_symbols(): if not sym.name: continue self.program.context.labels[sym.name] = sym.entry.st_value # check if it has st_visibility bit set - if sym.entry.st_shndx == 1: # STB_GLOBAL = 1 + if sym.entry.st_info.bind == 'STB_GLOBAL': self.program.global_labels.add(sym.name) print(FMT_PARSE + "LOADED GLOBAL SYMBOL {}: {}".format(sym.name, sym.entry.st_value) + FMT_NONE) diff --git a/riscemu/priv/Exceptions.py b/riscemu/priv/Exceptions.py index 01e863f..53214df 100644 --- a/riscemu/priv/Exceptions.py +++ b/riscemu/priv/Exceptions.py @@ -7,6 +7,7 @@ import typing from .. import RiscemuBaseException from ..colors import FMT_PARSE, FMT_NONE +from ..types import UInt32 if typing.TYPE_CHECKING: from .ElfLoader import ElfInstruction @@ -29,7 +30,7 @@ class CpuTrap(BaseException): The isInterrupt bit in the mstatus register """ - mtval: int + mtval: UInt32 """ contents of the mtval register """ @@ -47,7 +48,7 @@ class CpuTrap(BaseException): def __init__(self, code: int, mtval, type: CpuTrapType, priv: PrivModes = PrivModes.MACHINE): self.interrupt = 0 if type == CpuTrapType.EXCEPTION else 1 self.code = code - self.mtval = mtval + self.mtval = UInt32(mtval) self.priv = priv self.type = type diff --git a/riscemu/priv/ImageLoader.py b/riscemu/priv/ImageLoader.py index 9ef86e6..11f8fe7 100644 --- a/riscemu/priv/ImageLoader.py +++ b/riscemu/priv/ImageLoader.py @@ -26,7 +26,7 @@ class MemoryImageLoader(ProgramLoader): return argv, {} def parse(self) -> Iterable[Program]: - if self.options.get('debug', False): + if 'debug' not in self.options: yield self.parse_no_debug() return @@ -43,11 +43,11 @@ class MemoryImageLoader(ProgramLoader): if program.base is None: program.base = start - in_code_sec = get_section_base_name(sec_name) in INSTRUCTION_SECTION_NAMES + #in_code_sec = get_section_base_name(sec_name) in INSTRUCTION_SECTION_NAMES program.add_section( ElfMemorySection( data[start:start+size], sec_name, program.context, - name, start, MemoryFlags(in_code_sec, in_code_sec) + name, start, MemoryFlags(False, True) ) ) @@ -64,12 +64,12 @@ class MemoryImageLoader(ProgramLoader): p = Program(self.filename) p.add_section(ElfMemorySection( - bytearray(data), 'memory image contents', p.context, p.name, 0, MemoryFlags(False, True) + bytearray(data), '.text', p.context, p.name, 0, MemoryFlags(False, True) )) return p @classmethod def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader': - if os.path.exists(source_path + '.dbg'): + if os.path.isfile(source_path + '.dbg'): return MemoryImageLoader(source_path, dict(**options, debug=source_path + '.dbg')) return MemoryImageLoader(source_path, options) diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index 483300e..c74d766 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -14,8 +14,9 @@ from .ImageLoader import MemoryImageLoader from .PrivMMU import PrivMMU from .PrivRV32I import PrivRV32I from .privmodes import PrivModes +from ..IO.TextIO import TextIO from ..instructions import RV32A, RV32M -from ..types import Program +from ..types import Program, UInt32 if typing.TYPE_CHECKING: from riscemu.instructions.instruction_set import InstructionSet @@ -55,12 +56,16 @@ class PrivCPU(CPU): self.exit_code = 0 self._time_start = 0 - self._time_timecmp = 0 + self._time_timecmp = UInt32(0) self._time_interrupt_enabled = False # performance counters self._perf_counters = list() + # add TextIO + io = TextIO(0xFF0000, 64) + self.mmu.load_section(io, True) + # init csr self._init_csr() @@ -105,11 +110,11 @@ class PrivCPU(CPU): def _init_csr(self): # set up CSR self.csr = CSR() - self.csr.set('mhartid', 0) # core id + self.csr.set('mhartid', UInt32(0)) # core id # TODO: set correct value - self.csr.set('mimpid', 0) # implementation id + self.csr.set('mimpid', UInt32(0)) # implementation id # set mxl to 1 (32 bit) and set bits for i and m isa - self.csr.set('misa', (1 << 30) + (1 << 8) + (1 << 12)) # available ISA + self.csr.set('misa', UInt32((1 << 30) + (1 << 8) + (1 << 12))) # available ISA # CSR write callbacks: @@ -137,11 +142,11 @@ class PrivCPU(CPU): @self.csr.virtual_register('time') def get_time(): - return (time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start) & (2 ** 32 - 1) + return UInt32(time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start) @self.csr.virtual_register('timeh') def get_timeh(): - return (time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start) >> 32 + return UInt32((time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start) >> 32) # add minstret and mcycle counters @@ -156,7 +161,7 @@ class PrivCPU(CPU): self._timer_step() self._check_interrupt() ins = self.mmu.read_ins(self.pc) - if verbose and self.mode == PrivModes.USER: + if verbose and (self.mode == PrivModes.USER or self.conf.verbosity > 4): print(FMT_CPU + " Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins)) self.run_instruction(ins) self.pc += self.INS_XLEN @@ -168,6 +173,7 @@ class PrivCPU(CPU): self.mmu.translate_address(self.pc), self.pc ) + FMT_NONE) + breakpoint() if self.conf.debug_on_exception: raise LaunchDebuggerException() self.pc += self.INS_XLEN @@ -197,16 +203,16 @@ class PrivCPU(CPU): self.csr.set_mstatus('mpie', self.csr.get_mstatus('mie')) self.csr.set_mstatus('mpp', self.mode.value) - self.csr.set_mstatus('mie', 0) + self.csr.set_mstatus('mie', UInt32(0)) self.csr.set('mcause', trap.mcause) self.csr.set('mepc', self.pc - self.INS_XLEN) self.csr.set('mtval', trap.mtval) self.mode = trap.priv mtvec = self.csr.get('mtvec') if mtvec & 0b11 == 0: - self.pc = mtvec + self.pc = mtvec.value if mtvec & 0b11 == 1: - self.pc = (mtvec & 0b11111111111111111111111111111100) + (trap.code * 4) + self.pc = ((mtvec & 0b11111111111111111111111111111100) + (trap.code * 4)).value self.record_perf_profile() if len(self._perf_counters) > 100: self.show_perf() diff --git a/riscemu/priv/PrivRV32I.py b/riscemu/priv/PrivRV32I.py index 81f446a..409f2ef 100644 --- a/riscemu/priv/PrivRV32I.py +++ b/riscemu/priv/PrivRV32I.py @@ -44,7 +44,6 @@ class PrivRV32I(RV32I): old_val = self.cpu.csr.get(csr_addr) self.regs.set(rd, old_val) - def instruction_csrrc(self, ins: 'Instruction'): INS_NOT_IMPLEMENTED(ins) @@ -61,7 +60,6 @@ class PrivRV32I(RV32I): self.cpu.csr.assert_can_write(self.cpu.mode, addr) self.cpu.csr.set(addr, imm) - def instruction_csrrci(self, ins: 'Instruction'): INS_NOT_IMPLEMENTED(ins) @@ -77,10 +75,10 @@ class PrivRV32I(RV32I): self.cpu.mode = PrivModes(mpp) # restore pc mepc = self.cpu.csr.get('mepc') - self.cpu.pc = mepc - self.cpu.INS_XLEN + self.cpu.pc = (mepc - self.cpu.INS_XLEN).value if self.cpu.conf.verbosity > 0: - sec = self.mmu.get_sec_containing(mepc) + sec = self.mmu.get_sec_containing(mepc.value) if sec is not None: print(FMT_CPU + "[CPU] returning to mode {} in {} (0x{:x})".format( PrivModes(mpp).name, @@ -105,32 +103,32 @@ class PrivRV32I(RV32I): def instruction_beq(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 == rs2: - self.pc += dst - 4 + self.pc += dst.value - 4 def instruction_bne(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 != rs2: - self.pc += dst - 4 + self.pc += dst.value - 4 def instruction_blt(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 < rs2: - self.pc += dst - 4 + self.pc += dst.value - 4 def instruction_bge(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins) if rs1 >= rs2: - self.pc += dst - 4 + self.pc += dst.value - 4 def instruction_bltu(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False) if rs1 < rs2: - self.pc += dst - 4 + self.pc += dst.value - 4 def instruction_bgeu(self, ins: 'Instruction'): rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False) if rs1 >= rs2: - self.pc += dst - 4 + self.pc += dst.value - 4 # technically deprecated def instruction_j(self, ins: 'Instruction'): @@ -140,19 +138,24 @@ class PrivRV32I(RV32I): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) addr = ins.get_imm(1) - if reg == 'ra' and self.cpu.mode == PrivModes.USER and self.cpu.conf.verbosity > 1: - print(FMT_CPU + 'Jumping to {} (0x{:x})'.format( + if reg == 'ra' and ( + (self.cpu.mode == PrivModes.USER and self.cpu.conf.verbosity > 1) or + (self.cpu.conf.verbosity > 3) + ): + print(FMT_CPU + 'Jumping from 0x{:x} to {} (0x{:x})'.format( + self.pc, self.mmu.translate_address(self.pc + addr), self.pc + addr ) + FMT_NONE) - self.regs.set(reg, self.pc) + self.regs.dump_reg_a() + self.regs.set(reg, Int32(self.pc)) self.pc += addr - 4 def instruction_jalr(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) rd, rs, imm = self.parse_rd_rs_imm(ins) - self.regs.set(rd, self.pc) - self.pc = rs + imm - 4 + self.regs.set(rd, Int32(self.pc)) + self.pc = rs.value + imm.value - 4 def instruction_sbreak(self, ins: 'Instruction'): raise LaunchDebuggerException() diff --git a/riscemu/priv/__main__.py b/riscemu/priv/__main__.py index bbdd1fb..6e74029 100644 --- a/riscemu/priv/__main__.py +++ b/riscemu/priv/__main__.py @@ -34,4 +34,4 @@ if __name__ == '__main__': for program in program_iter: cpu.load_program(program) - cpu.launch() + cpu.launch(verbose=args.verbose > 4) diff --git a/riscemu/priv/types.py b/riscemu/priv/types.py index 585f580..f42d030 100644 --- a/riscemu/priv/types.py +++ b/riscemu/priv/types.py @@ -43,8 +43,8 @@ class ElfMemorySection(BinaryDataMemorySection): def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int, flags: MemoryFlags): super().__init__(data, name, context, owner, base=base, flags=flags) + self.read_ins = lru_cache(maxsize=self.size // 4)(self.read_ins) - @lru_cache def read_ins(self, offset): if not self.flags.executable: print(FMT_PARSE + "Reading instruction from non-executable memory!" + FMT_NONE) @@ -65,7 +65,7 @@ class ElfMemorySection(BinaryDataMemorySection): class MemoryImageDebugInfos: - VERSION = '1' + VERSION = '1.0.0' """ Schema version """ @@ -99,6 +99,8 @@ class MemoryImageDebugInfos: self.sections = sections self.symbols = symbols self.globals = globals + for name in globals: + globals[name] = set(globals[name]) self.base = base def serialize(self) -> str: @@ -110,7 +112,13 @@ class MemoryImageDebugInfos: return "<>".format(getattr(obj, '__qualname__', '{unknown}')) return json.dumps( - dict(sections=self.sections, symbols=self.symbols, globals=self.globals, base=self.base), + dict( + sections=self.sections, + symbols=self.symbols, + globals=self.globals, + base=self.base, + VERSION=self.VERSION + ), default=serialize ) @@ -124,7 +132,7 @@ class MemoryImageDebugInfos: version: str = json_obj.pop('VERSION') # compare major version - if version != cls.VERSION or version.split('.')[0] != cls.VERSION.split('.')[0]: + if version != cls.VERSION and version.split('.')[0] != cls.VERSION.split('.')[0]: raise RuntimeError( "Unknown MemoryImageDebugInfo version! This emulator expects version {}, debug info version {}".format( cls.VERSION, version diff --git a/riscemu/registers.py b/riscemu/registers.py index aa45915..caa7e36 100644 --- a/riscemu/registers.py +++ b/riscemu/registers.py @@ -8,6 +8,9 @@ from collections import defaultdict from .helpers import * +if typing.TYPE_CHECKING: + from .types import Int32 + class Registers: """ @@ -15,7 +18,8 @@ class Registers: """ def __init__(self): - self.vals = defaultdict(lambda: 0) + from .types import Int32 + self.vals = defaultdict(lambda: Int32(0)) self.last_set = None self.last_read = None @@ -81,7 +85,7 @@ class Registers: return FMT_GRAY + txt + FMT_NONE return txt - def set(self, reg, val, mark_set=True) -> bool: + def set(self, reg, val: 'Int32', mark_set=True) -> bool: """ Set a register content to val :param reg: The register to set @@ -89,6 +93,12 @@ class Registers: :param mark_set: If True, marks this register as "last accessed" (only used internally) :return: If the operation was successful """ + + from .types import Int32 + # remove after refactoring is complete + if not isinstance(val, Int32): + raise RuntimeError("Setting register to non-Int32 value! Please refactor your code!") + if reg == 'zero': return False # if reg not in Registers.all_registers(): @@ -99,10 +109,10 @@ class Registers: if mark_set: self.last_set = reg # check 32 bit signed bounds - self.vals[reg] = bind_twos_complement(val) + self.vals[reg] = val return True - def get(self, reg, mark_read=True): + def get(self, reg, mark_read=True) -> 'Int32': """ Retuns the contents of register reg :param reg: The register name diff --git a/riscemu/types.py b/riscemu/types.py index 0746d3d..4a4692f 100644 --- a/riscemu/types.py +++ b/riscemu/types.py @@ -12,11 +12,12 @@ import re import typing from abc import ABC, abstractmethod from collections import defaultdict +from ctypes import c_uint32, c_int32 from dataclasses import dataclass from typing import Dict, List, Optional, Tuple, Set, Union, Iterator, Callable, Type -from .config import RunConfig from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_RED, FMT_BOLD +from .config import RunConfig from .exceptions import ParseException from .helpers import format_bytes, get_section_base_name from .registers import Registers @@ -35,6 +36,206 @@ T_ParserOpts = Dict[str, any] NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$') +class Int32: + _type = c_int32 + __slots__ = ('_val',) + + def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0): + if isinstance(val, (bytes, bytearray)): + self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True)) + elif isinstance(val, self.__class__._type): + self._val = val + elif isinstance(val, (c_uint32, c_int32, Int32)): + self._val = self.__class__._type(val.value) + elif isinstance(val, int): + self._val = self.__class__._type(val) + else: + raise RuntimeError( + "Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val) + ) + + def __add__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + + return self.__class__(self._val.value + other) + + def __sub__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value - other) + + def __mul__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value * other) + + def __truediv__(self, other): + return self // other + + def __floordiv__(self, other): + if isinstance(other, Int32): + other = other.value + return self.__class__(self.value // other) + + def __mod__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value % other) + + def __and__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value & other) + + def __or__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value | other) + + def __xor__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value ^ other) + + def __lshift__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self.value << other) + + def __rshift__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self.value >> other) + + def __eq__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.value == other + + def __neg__(self): + return self.__class__(-self._val.value) + + def __abs__(self): + return self.__class__(abs(self.value)) + + def __bytes__(self): + return self.to_bytes(4) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, self.value) + + def __str__(self): + return str(self.value) + + def __format__(self, format_spec): + return self.value.__format__(format_spec) + + def __hash__(self): + return hash(self.value) + + def __gt__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value > other + + def __lt__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value < other + + def __le__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value <= other + + def __ge__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value >= other + + def __bool__(self): + return bool(self.value) + + def __cmp__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value.__cmp__(other) + + # right handed binary operators + + def __radd__(self, other): + return self + other + + def __rsub__(self, other): + return self.__class__(other) - self + + def __rmul__(self, other): + return self * other + + def __rtruediv__(self, other): + return self.__class__(other) // self + + def __rfloordiv__(self, other): + return self.__class__(other) // self + + def __rmod__(self, other): + return self.__class__(other) % self + + def __rand__(self, other): + return self.__class__(other) & self + + def __ror__(self, other): + return self.__class__(other) | self + + def __rxor__(self, other): + return self.__class__(other) ^ self + + @property + def value(self): + return self._val.value + + def unsigned(self) -> 'UInt32': + return UInt32(self) + + def to_bytes(self, bytes: int = 4) -> bytearray: + return bytearray(self.unsigned_value.to_bytes(bytes, 'little')) + + def signed(self) -> 'Int32': + if self.__class__ == Int32: + return self + return Int32(self) + + @property + def unsigned_value(self): + return c_uint32(self.value).value + + def shift_right_logical(self, ammount: Union['Int32', int]): + if isinstance(ammount, Int32): + ammount = ammount.value + return self.__class__((self.value % 0x100000000) >> ammount) + + def __int__(self): + return self.value + + def __hex__(self): + return hex(self.value) + + +class UInt32(Int32): + _type = c_uint32 + + def unsigned(self) -> 'UInt32': + return self + + @property + def unsigned_value(self): + return self._val.value + + def shift_right_logical(self, ammount: Union['Int32', int]): + return self >> ammount + + @dataclass(frozen=True) class MemoryFlags: read_only: bool @@ -242,16 +443,17 @@ class Program: # print a warning when a section is located before the programs base if self.base is not None: if sec.base < self.base: - print(FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format( - sec, self.name, self.base - ) + FMT_NONE) + print( + FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format( + sec, self.name, self.base + ) + FMT_NONE) self.sections.append(sec) # keep section list ordered self.sections.sort(key=lambda section: section.base) def __repr__(self): - return "{}(name={},globals={},sections={},base={})".format( + return "{}(name={},sections={},base={})".format( self.__class__.__name__, self.name, self.global_labels, [s.name for s in self.sections], self.base ) @@ -273,6 +475,8 @@ class Program: This will do a small sanity check to prevent programs loading twice, or at addresses they don't expect to be loaded. + Then it will finalize all relative symbols defined in it to point to the correct addresses. + :param at_addr: the address where the program will be located """ if self.is_loaded: @@ -449,4 +653,4 @@ class CPU(ABC): @property def programs(self): - return self.mmu.programs \ No newline at end of file + return self.mmu.programs diff --git a/setup.py b/setup.py index 238e88f..edaae98 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding="utf-8") as fh: setuptools.setup( name="riscemu", version=riscemu.__version__, - author="Anton Lydike", + author=riscemu.__author__, author_email="pip@antonlydike.de", description="RISC-V userspace and privileged emulator", long_description=long_description, diff --git a/test/__init__.py b/test/__init__.py index 8030002..1f2c0dd 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,2 +1,3 @@ from .test_tokenizer import * -from .test_helpers import * \ No newline at end of file +from .test_helpers import * +from .test_integers import * \ No newline at end of file diff --git a/test/test_helpers.py b/test/test_helpers.py index 60d93b0..f37517d 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -4,29 +4,6 @@ from riscemu.helpers import * class TestHelpers(TestCase): - def test_int_to_bytes(self): - self.assertEqual(int_to_bytes(-1), bytearray([0xff] * 4), "-1") - self.assertEqual(int_to_bytes(1), bytearray([0, 0, 0, 1]), "1") - self.assertEqual(int_to_bytes(1231132), bytearray(b'\x00\x12\xc9\x1c'), "random number") - self.assertEqual(int_to_bytes(-1231132), bytearray(b'\xff\xed6\xe4'), "random negative number") - - def test_int_from_bytes(self): - self.assertEqual(bytearray([0xff] * 4), int_to_bytes(-1), "-1") - self.assertEqual(bytearray([0, 0, 0, 1]), int_to_bytes(1), "1") - self.assertEqual(bytearray(b'\x00\x12\xc9\x1c'), int_to_bytes(1231132), "random number") - self.assertEqual(bytearray(b'\xff\xed6\xe4'), int_to_bytes(-1231132), "random negative number") - - def test_to_unsigned(self): - self.assertEqual(to_unsigned(-1), 0xFFFFFFFF) - self.assertEqual(to_unsigned(-100), 0xffffff9c) - self.assertEqual(to_unsigned(1), 1) - self.assertEqual(to_unsigned(0xffffffff), 0xffffffff) - self.assertEqual(to_unsigned(0xffed36e4), 0xffed36e4) - - def test_to_signed(self): - self.assertEqual(to_signed(0xFFFFFFFF), -1) - self.assertEqual(to_signed(0xffed36e4), -1231132) - self.assertEqual(to_signed(0x0FFFFFFF), 0x0FFFFFFF) def test_bind_twos_complement(self): minval = -(1 << 31) diff --git a/test/test_integers.py b/test/test_integers.py new file mode 100644 index 0000000..bb11141 --- /dev/null +++ b/test/test_integers.py @@ -0,0 +1,19 @@ +from unittest import TestCase + +from riscemu.types import Int32, UInt32 + + +class TestTokenizer(TestCase): + + def test_logical_right_shift(self): + a = Int32(100) + self.assertEqual(a.shift_right_logical(0), a) + self.assertEqual(a.shift_right_logical(10), 0) + self.assertEqual(a.shift_right_logical(1), 100>>1) + + a = Int32(-100) + self.assertEqual(a.shift_right_logical(0), a) + self.assertEqual(a.shift_right_logical(1), 2147483598) + self.assertEqual(a.shift_right_logical(10), 4194303) + self.assertEqual(a.shift_right_logical(31), 1) + self.assertEqual(a.shift_right_logical(32), 0) diff --git a/test/test_isa.py b/test/test_isa.py index 80a7a13..cc69052 100644 --- a/test/test_isa.py +++ b/test/test_isa.py @@ -1,6 +1,4 @@ from riscemu.colors import FMT_ERROR, FMT_NONE, FMT_BOLD, FMT_GREEN -from riscemu.exceptions import ASSERT_LEN -from riscemu.helpers import int_from_bytes from riscemu.instructions import InstructionSet from riscemu.types import Instruction, CPU from riscemu.decoder import RISCV_REGS From cadccaef00a1bddf7d6ddc42fb68836768568a8d Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 18:44:41 +0200 Subject: [PATCH 16/30] [priv] fixed printing for mret, sret and uret --- riscemu/decoder/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscemu/decoder/formatter.py b/riscemu/decoder/formatter.py index 0d7d304..d2e5dbd 100644 --- a/riscemu/decoder/formatter.py +++ b/riscemu/decoder/formatter.py @@ -20,7 +20,7 @@ def format_ins(ins: int, name: str, fmt: str = 'int'): return f"{name} " decoder = INSTRUCTION_ARGS_DECODER[opcode] - if name in ('ecall', 'ebreak'): + if name in ('ecall', 'ebreak', 'mret', 'sret', 'uret'): return name if opcode in (0x8, 0x0): r1, r2, imm = decoder(ins) From 26d0a165f7ed8e7820599929427ddb007c744b02 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 18:45:28 +0200 Subject: [PATCH 17/30] [priv] added --slowdown flag to slow down emulated clock speed --- riscemu/config.py | 1 + riscemu/priv/PrivCPU.py | 2 ++ riscemu/priv/__main__.py | 6 +++--- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/riscemu/config.py b/riscemu/config.py index e5f49a6..a3dc5ae 100644 --- a/riscemu/config.py +++ b/riscemu/config.py @@ -19,6 +19,7 @@ class RunConfig: scall_input: bool = True scall_fs: bool = False verbosity: int = 0 + slowdown: float = 1 CONFIG = RunConfig() diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index c74d766..d2481e2 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -69,6 +69,8 @@ class PrivCPU(CPU): # init csr self._init_csr() + self.TIME_RESOLUTION_NS = int(self.TIME_RESOLUTION_NS * conf.slowdown) + def run(self, verbose=False): if self.pc <= 0: return False diff --git a/riscemu/priv/__main__.py b/riscemu/priv/__main__.py index 6e74029..0f869da 100644 --- a/riscemu/priv/__main__.py +++ b/riscemu/priv/__main__.py @@ -1,8 +1,6 @@ from riscemu import RunConfig from riscemu.types import Program from .PrivCPU import PrivCPU -from .ElfLoader import ElfBinaryFileLoader -from .ImageLoader import MemoryImageLoader import sys @@ -19,9 +17,11 @@ if __name__ == '__main__': parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count', default=0) + parser.add_argument('--slowdown', help="Slow down the emulated CPU clock by a factor", type=float, default=1) + args = parser.parse_args() - cpu = PrivCPU(RunConfig(verbosity=args.verbose, debug_on_exception=args.debug_exceptions)) + cpu = PrivCPU(RunConfig(verbosity=args.verbose, debug_on_exception=args.debug_exceptions, slowdown=args.slowdown)) for source_path in args.source: loader = max((loader for loader in cpu.get_loaders()), key=lambda l: l.can_parse(source_path)) From 254410e9ccc7db12b0f7414093e594912ddf7ca5 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 18:45:59 +0200 Subject: [PATCH 18/30] [priv] fixed error in halt csr --- riscemu/priv/PrivCPU.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index d2481e2..f7df7a9 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -121,22 +121,18 @@ class PrivCPU(CPU): # CSR write callbacks: @self.csr.callback('halt') - def halt(old: int, new: int): + def halt(old: UInt32, new: UInt32): if new != 0: - self.exit = True - self.exit_code = new - - @self.csr.callback('mstatus') - def mstatus(old: int, new: int): - pass + self.halted = True + self.exit_code = new.value @self.csr.callback('mtimecmp') - def mtimecmp(old, new): + def mtimecmp(old: UInt32, new: UInt32): self._time_timecmp = (self.csr.get('mtimecmph') << 32) + new self._time_interrupt_enabled = True @self.csr.callback('mtimecmph') - def mtimecmph(old, new): + def mtimecmph(old: UInt32, new: UInt32): self._time_timecmp = (new << 32) + self.csr.get('mtimecmp') self._time_interrupt_enabled = True From bc26ed3a02ab1eecb02915045ef7243417f91b1a Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 19:06:23 +0200 Subject: [PATCH 19/30] [restructured] moved all simple type definitions into riscemu.types --- riscemu/CPU.py | 3 +- riscemu/assembler.py | 3 +- riscemu/base.py | 82 --- riscemu/debug.py | 2 +- riscemu/interactive.py | 3 +- riscemu/parser.py | 6 +- riscemu/priv/PrivCPU.py | 1 + riscemu/priv/types.py | 7 +- riscemu/types.py | 656 -------------------- riscemu/types/__init__.py | 26 + riscemu/types/binary_data_memory_section.py | 29 + riscemu/types/cpu.py | 107 ++++ riscemu/types/flags.py | 13 + riscemu/types/instruction.py | 31 + riscemu/types/instruction_context.py | 53 ++ riscemu/types/instruction_memory_section.py | 27 + riscemu/types/int32.py | 202 ++++++ riscemu/types/memory_section.py | 88 +++ riscemu/types/program.py | 104 ++++ riscemu/types/program_loader.py | 58 ++ riscemu/types/simple_instruction.py | 26 + setup.py | 2 +- 22 files changed, 775 insertions(+), 754 deletions(-) delete mode 100644 riscemu/base.py delete mode 100644 riscemu/types.py create mode 100644 riscemu/types/__init__.py create mode 100644 riscemu/types/binary_data_memory_section.py create mode 100644 riscemu/types/cpu.py create mode 100644 riscemu/types/flags.py create mode 100644 riscemu/types/instruction.py create mode 100644 riscemu/types/instruction_context.py create mode 100644 riscemu/types/instruction_memory_section.py create mode 100644 riscemu/types/int32.py create mode 100644 riscemu/types/memory_section.py create mode 100644 riscemu/types/program.py create mode 100644 riscemu/types/program_loader.py create mode 100644 riscemu/types/simple_instruction.py diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 2fcedd0..ce405ca 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -12,12 +12,11 @@ from typing import List, Type import riscemu from .config import RunConfig from .MMU import MMU -from .base import BinaryDataMemorySection from .colors import FMT_CPU, FMT_NONE from .debug import launch_debug_session from .exceptions import RiscemuBaseException, LaunchDebuggerException from .syscall import SyscallInterface, get_syscall_symbols -from .types import CPU, ProgramLoader, Int32 +from .types import CPU, ProgramLoader, Int32, BinaryDataMemorySection from .parser import AssemblyFileLoader if typing.TYPE_CHECKING: diff --git a/riscemu/assembler.py b/riscemu/assembler.py index 8e0fca0..121201a 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -2,12 +2,11 @@ from enum import Enum, auto from typing import List from typing import Optional, Tuple, Union -from .base import BinaryDataMemorySection, InstructionMemorySection from .colors import FMT_PARSE, FMT_NONE from .exceptions import ParseException, ASSERT_LEN from .helpers import parse_numeric_argument, align_addr, get_section_base_name from .tokenizer import Token -from .types import Program, T_RelativeAddress, InstructionContext, Instruction, UInt32, Int32 +from .types import Program, T_RelativeAddress, InstructionContext, Instruction, BinaryDataMemorySection, InstructionMemorySection INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini') """ diff --git a/riscemu/base.py b/riscemu/base.py deleted file mode 100644 index 474e4ed..0000000 --- a/riscemu/base.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -This file contains a base implementation of Instruction, and MemorySection. - -This aims to be a simple base, usable for everyone who needs the basic functionality, but doesn't -want to set up their own subtypes of Instruction and MemorySection -""" - -from typing import List, Tuple, Union -from .exceptions import MemoryAccessException -from .helpers import parse_numeric_argument -from .types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \ - T_AbsoluteAddress, Program - - -class SimpleInstruction(Instruction): - def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]], - context: InstructionContext, addr: T_RelativeAddress): - self.context = context - self.name = name - self.args = args - self.addr = addr - - def get_imm(self, num: int) -> int: - resolved_label = self.context.resolve_label(self.args[num], self.addr) - if resolved_label is None: - return parse_numeric_argument(self.args[num]) - return resolved_label - - def get_imm_reg(self, num: int) -> Tuple[int, str]: - return self.get_imm(num + 1), self.get_reg(num) - - def get_reg(self, num: int) -> str: - return self.args[num] - - -class InstructionMemorySection(MemorySection): - def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0): - self.name = name - self.base = base - self.context = context - self.size = len(instructions) * 4 - self.flags = MemoryFlags(True, True) - self.instructions = instructions - self.owner = owner - - def read(self, offset: T_RelativeAddress, size: int) -> bytearray: - raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read') - - def write(self, offset: T_RelativeAddress, size: int, data: bytearray): - raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write') - - def read_ins(self, offset: T_RelativeAddress) -> Instruction: - if offset % 4 != 0: - raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch') - return self.instructions[offset // 4] - - -class BinaryDataMemorySection(MemorySection): - def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None): - self.name = name - self.base = base - self.context = context - self.size = len(data) - self.flags = flags if flags is not None else MemoryFlags(False, False) - self.data = data - self.owner = owner - - def read(self, offset: T_RelativeAddress, size: int) -> bytearray: - if offset + size > self.size: - raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read') - return self.data[offset:offset + size] - - def write(self, offset: T_RelativeAddress, size: int, data: bytearray): - if offset + size > self.size: - raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write') - if len(data[0:size]) != size: - raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write') - self.data[offset:offset + size] = data[0:size] - - def read_ins(self, offset: T_RelativeAddress) -> Instruction: - raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self), - offset, 4, 'instruction fetch') diff --git a/riscemu/debug.py b/riscemu/debug.py index fd68f7b..5e4a6f4 100644 --- a/riscemu/debug.py +++ b/riscemu/debug.py @@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT """ import os.path -from .base import SimpleInstruction +from .types import SimpleInstruction from .helpers import * if typing.TYPE_CHECKING: diff --git a/riscemu/interactive.py b/riscemu/interactive.py index 71526f3..5b6b088 100644 --- a/riscemu/interactive.py +++ b/riscemu/interactive.py @@ -1,6 +1,5 @@ from riscemu import RunConfig -from riscemu.base import InstructionMemorySection, SimpleInstruction -from riscemu.types import InstructionContext, Program +from riscemu.types import InstructionMemorySection, SimpleInstruction, Program if __name__ == '__main__': from .CPU import UserModeCPU diff --git a/riscemu/parser.py b/riscemu/parser.py index f70cfa3..cd9ece0 100644 --- a/riscemu/parser.py +++ b/riscemu/parser.py @@ -3,17 +3,15 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -import os import re from typing import Dict, Tuple, Iterable, Callable, List -from .helpers import Peekable from .assembler import MemorySectionType, ParseContext, AssemblerDirectives -from .types import Program, T_ParserOpts, ProgramLoader from .colors import FMT_PARSE from .exceptions import ParseException +from .helpers import Peekable from .tokenizer import Token, TokenType, tokenize -from .base import SimpleInstruction +from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction def parse_instruction(token: Token, args: Tuple[str], context: ParseContext): diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index f7df7a9..e6ec7ef 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -191,6 +191,7 @@ class PrivCPU(CPU): trap = self.pending_traps.pop() # use the most recent trap if self.conf.verbosity > 0: print(FMT_CPU + "[CPU] taking trap {}!".format(trap) + FMT_NONE) + self.regs.dump_reg_a() if trap.priv != PrivModes.MACHINE: print(FMT_CPU + "[CPU] Trap not targeting machine mode encountered! - undefined behaviour!" + FMT_NONE) diff --git a/riscemu/priv/types.py b/riscemu/priv/types.py index f42d030..7882bdd 100644 --- a/riscemu/priv/types.py +++ b/riscemu/priv/types.py @@ -4,12 +4,11 @@ from dataclasses import dataclass from functools import lru_cache from typing import Tuple, Dict, Set -from riscemu import MemoryAccessException -from riscemu.priv.Exceptions import InstructionAccessFault, InstructionAddressMisalignedTrap, LoadAccessFault -from riscemu.types import Instruction, InstructionContext, T_RelativeAddress, MemoryFlags, T_AbsoluteAddress -from riscemu.base import BinaryDataMemorySection from riscemu.colors import FMT_NONE, FMT_PARSE from riscemu.decoder import format_ins, RISCV_REGS, decode +from riscemu.priv.Exceptions import InstructionAccessFault, InstructionAddressMisalignedTrap, LoadAccessFault +from riscemu.types import Instruction, InstructionContext, T_RelativeAddress, MemoryFlags, T_AbsoluteAddress, \ + BinaryDataMemorySection @dataclass(frozen=True) diff --git a/riscemu/types.py b/riscemu/types.py deleted file mode 100644 index 4a4692f..0000000 --- a/riscemu/types.py +++ /dev/null @@ -1,656 +0,0 @@ -""" -RiscEmu (c) 2021 Anton Lydike - -SPDX-License-Identifier: MIT - -This file contains abstract base classes and types, bundling only the absolute basic functionality - -See base.py for some basic implementations of these classes -""" -import os -import re -import typing -from abc import ABC, abstractmethod -from collections import defaultdict -from ctypes import c_uint32, c_int32 -from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple, Set, Union, Iterator, Callable, Type - -from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_RED, FMT_BOLD -from .config import RunConfig -from .exceptions import ParseException -from .helpers import format_bytes, get_section_base_name -from .registers import Registers - -if typing.TYPE_CHECKING: - from .MMU import MMU - from .instructions.instruction_set import InstructionSet - -# define some base type aliases so we can keep track of absolute and relative addresses -T_RelativeAddress = int -T_AbsoluteAddress = int - -# parser options are just dictionaries with arbitrary values -T_ParserOpts = Dict[str, any] - -NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$') - - -class Int32: - _type = c_int32 - __slots__ = ('_val',) - - def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0): - if isinstance(val, (bytes, bytearray)): - self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True)) - elif isinstance(val, self.__class__._type): - self._val = val - elif isinstance(val, (c_uint32, c_int32, Int32)): - self._val = self.__class__._type(val.value) - elif isinstance(val, int): - self._val = self.__class__._type(val) - else: - raise RuntimeError( - "Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val) - ) - - def __add__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - - return self.__class__(self._val.value + other) - - def __sub__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value - other) - - def __mul__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value * other) - - def __truediv__(self, other): - return self // other - - def __floordiv__(self, other): - if isinstance(other, Int32): - other = other.value - return self.__class__(self.value // other) - - def __mod__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value % other) - - def __and__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value & other) - - def __or__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value | other) - - def __xor__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value ^ other) - - def __lshift__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self.value << other) - - def __rshift__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self.value >> other) - - def __eq__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.value == other - - def __neg__(self): - return self.__class__(-self._val.value) - - def __abs__(self): - return self.__class__(abs(self.value)) - - def __bytes__(self): - return self.to_bytes(4) - - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, self.value) - - def __str__(self): - return str(self.value) - - def __format__(self, format_spec): - return self.value.__format__(format_spec) - - def __hash__(self): - return hash(self.value) - - def __gt__(self, other): - if isinstance(other, Int32): - other = other.value - return self.value > other - - def __lt__(self, other): - if isinstance(other, Int32): - other = other.value - return self.value < other - - def __le__(self, other): - if isinstance(other, Int32): - other = other.value - return self.value <= other - - def __ge__(self, other): - if isinstance(other, Int32): - other = other.value - return self.value >= other - - def __bool__(self): - return bool(self.value) - - def __cmp__(self, other): - if isinstance(other, Int32): - other = other.value - return self.value.__cmp__(other) - - # right handed binary operators - - def __radd__(self, other): - return self + other - - def __rsub__(self, other): - return self.__class__(other) - self - - def __rmul__(self, other): - return self * other - - def __rtruediv__(self, other): - return self.__class__(other) // self - - def __rfloordiv__(self, other): - return self.__class__(other) // self - - def __rmod__(self, other): - return self.__class__(other) % self - - def __rand__(self, other): - return self.__class__(other) & self - - def __ror__(self, other): - return self.__class__(other) | self - - def __rxor__(self, other): - return self.__class__(other) ^ self - - @property - def value(self): - return self._val.value - - def unsigned(self) -> 'UInt32': - return UInt32(self) - - def to_bytes(self, bytes: int = 4) -> bytearray: - return bytearray(self.unsigned_value.to_bytes(bytes, 'little')) - - def signed(self) -> 'Int32': - if self.__class__ == Int32: - return self - return Int32(self) - - @property - def unsigned_value(self): - return c_uint32(self.value).value - - def shift_right_logical(self, ammount: Union['Int32', int]): - if isinstance(ammount, Int32): - ammount = ammount.value - return self.__class__((self.value % 0x100000000) >> ammount) - - def __int__(self): - return self.value - - def __hex__(self): - return hex(self.value) - - -class UInt32(Int32): - _type = c_uint32 - - def unsigned(self) -> 'UInt32': - return self - - @property - def unsigned_value(self): - return self._val.value - - def shift_right_logical(self, ammount: Union['Int32', int]): - return self >> ammount - - -@dataclass(frozen=True) -class MemoryFlags: - read_only: bool - executable: bool - - def __repr__(self): - return "r{}{}".format( - '-' if self.read_only else 'w', - 'x' if self.executable else '-' - ) - - -class InstructionContext: - base_address: T_AbsoluteAddress - """ - The address where the instruction block is placed - """ - - labels: Dict[str, T_RelativeAddress] - """ - This dictionary maps all labels to their relative position of the instruction block - """ - - numbered_labels: Dict[str, List[T_RelativeAddress]] - """ - This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where - the label was placed - """ - - global_symbol_dict: Dict[str, T_AbsoluteAddress] - """ - A reference to the MMU's global symbol dictionary for access to global symbols - """ - - def __init__(self): - self.labels = dict() - self.numbered_labels = defaultdict(list) - self.base_address = 0 - self.global_symbol_dict = dict() - - def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]: - if NUMBER_SYMBOL_PATTERN.match(symbol): - if address_at is None: - raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol)) - - direction = symbol[-1] - values = self.numbered_labels.get(symbol[:-1], []) - if direction == 'b': - return max((addr + self.base_address for addr in values if addr < address_at), default=None) - else: - return min((addr + self.base_address for addr in values if addr > address_at), default=None) - else: - # if it's not a local symbol, try the globals - if symbol not in self.labels: - return self.global_symbol_dict.get(symbol, None) - # otherwise return the local symbol - return self.labels.get(symbol, None) - - -class Instruction(ABC): - name: str - args: tuple - - @abstractmethod - def get_imm(self, num: int) -> int: - """ - parse and get immediate argument - """ - pass - - @abstractmethod - def get_imm_reg(self, num: int) -> Tuple[int, str]: - """ - parse and get an argument imm(reg) - """ - pass - - @abstractmethod - def get_reg(self, num: int) -> str: - """ - parse and get an register argument - """ - pass - - def __repr__(self): - return "{} {}".format(self.name, ", ".join(self.args)) - - -@dataclass -class MemorySection(ABC): - name: str - flags: MemoryFlags - size: int - base: T_AbsoluteAddress - owner: str - context: InstructionContext - - @property - def end(self): - return self.base + self.size - - @abstractmethod - def read(self, offset: T_RelativeAddress, size: int) -> bytearray: - pass - - @abstractmethod - def write(self, offset: T_RelativeAddress, size: int, data: bytearray): - pass - - @abstractmethod - def read_ins(self, offset: T_RelativeAddress) -> Instruction: - pass - - def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex', - bytes_per_row: int = 16, rows: int = 10, group: int = 4): - if self.flags.executable: - bytes_per_row = 4 - highlight = None - if end is None: - end = min(start + (bytes_per_row * (rows // 2)), self.size - 1) - highlight = start - start = max(0, start - (bytes_per_row * (rows // 2))) - - if self.flags.executable: - print(FMT_MEM + "{}, viewing {} instructions:".format( - self, (end - start) // 4 - ) + FMT_NONE) - - for addr in range(start, end, 4): - if addr == highlight: - print(FMT_UNDERLINE + FMT_ORANGE, end='') - print("0x{:04x}: {}{}".format( - self.base + addr, self.read_ins(addr), FMT_NONE - )) - else: - print(FMT_MEM + "{}, viewing {} bytes:".format( - self, (end - start) - ) + FMT_NONE) - - aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end - - for addr in range(start, aligned_end, bytes_per_row): - hi_ind = (highlight - addr) // group if highlight is not None else -1 - print("0x{:04x}: {}{}".format( - self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE - )) - - if aligned_end != end: - hi_ind = (highlight - aligned_end) // group if highlight is not None else -1 - print("0x{:04x}: {}{}".format( - self.base + aligned_end, format_bytes( - self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind - ), FMT_NONE - )) - - def dump_all(self, *args, **kwargs): - self.dump(0, self.size, *args, **kwargs) - - def __repr__(self): - return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format( - self.__class__.__name__, - self.name, - self.base, - self.size, - self.flags, - self.owner - ) - - -class Program: - """ - This represents a collection of sections which together form an executable program - - When you want to create a program which can be located anywhere in memory, set base to None, - this signals the other components, that this is relocatable. Set the base of each section to - the offset in the program, and everything will be taken care of for you. - - """ - name: str - context: InstructionContext - global_labels: Set[str] - relative_labels: Set[str] - sections: List[MemorySection] - base: Optional[T_AbsoluteAddress] - is_loaded: bool - - @property - def size(self): - if len(self.sections) == 0: - return 0 - if self.base is None: - return self.sections[-1].base + self.sections[-1].size - return (self.sections[-1].base - self.base) + self.sections[-1].size - - def __init__(self, name: str, base: Optional[int] = None): - self.name = name - self.context = InstructionContext() - self.sections = [] - self.global_labels = set() - self.relative_labels = set() - self.base = base - self.is_loaded = False - - def add_section(self, sec: MemorySection): - # print a warning when a section is located before the programs base - if self.base is not None: - if sec.base < self.base: - print( - FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format( - sec, self.name, self.base - ) + FMT_NONE) - - self.sections.append(sec) - # keep section list ordered - self.sections.sort(key=lambda section: section.base) - - def __repr__(self): - return "{}(name={},sections={},base={})".format( - self.__class__.__name__, self.name, self.global_labels, - [s.name for s in self.sections], self.base - ) - - @property - def entrypoint(self): - if '_start' in self.context.labels: - return self.context.labels.get('_start') - if 'main' in self.context.labels: - return self.context.labels.get('main') - for sec in self.sections: - if get_section_base_name(sec.name) == '.text' and sec.flags.executable: - return sec.base - - def loaded_trigger(self, at_addr: T_AbsoluteAddress): - """ - This trigger is called when the binary is loaded and its final address in memory is determined - - This will do a small sanity check to prevent programs loading twice, or at addresses they don't - expect to be loaded. - - Then it will finalize all relative symbols defined in it to point to the correct addresses. - - :param at_addr: the address where the program will be located - """ - if self.is_loaded: - if at_addr != self.base: - raise RuntimeError("Program loaded twice at different addresses! This will probably break things!") - return - - if self.base is not None and self.base != at_addr: - print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, ' - 'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE) - - # check if we are relocating - if self.base != at_addr: - offset = at_addr if self.base is None else at_addr - self.base - - # move all sections by the offset - for sec in self.sections: - sec.base += offset - - # move all relative symbols by the offset - for name in self.relative_labels: - self.context.labels[name] += offset - - self.base = at_addr - self.context.base_address = at_addr - - -class ProgramLoader(ABC): - """ - A program loader is always specific to a given source file. It is a place to store all state - concerning the parsing and loading of that specific source file, including options. - """ - - def __init__(self, source_path: str, options: T_ParserOpts): - self.source_path = source_path - self.options = options - self.filename = os.path.split(self.source_path)[-1] - - @classmethod - @abstractmethod - def can_parse(cls, source_path: str) -> float: - """ - Return confidence that the file located at source_path - should be parsed and loaded by this loader - :param source_path: the path of the source file - :return: the confidence that this file belongs to this parser - """ - pass - - @classmethod - @abstractmethod - def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]: - """ - parse command line args into an options dictionary - - :param argv: the command line args list - :return: all remaining command line args and the parser options object - """ - pass - - @classmethod - def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader': - """ - Instantiate a loader for the given source file with the required arguments - - :param source_path: the path to the source file - :param options: the parsed options (guaranteed to come from this classes get_options method. - :return: An instance of a ProgramLoader for the spcified source - """ - return cls(source_path, options) - - @abstractmethod - def parse(self) -> Union[Program, Iterator[Program]]: - """ - - :return: - """ - pass - - -class CPU(ABC): - # static cpu configuration - INS_XLEN: int = 4 - - # housekeeping variables - regs: Registers - mmu: 'MMU' - pc: T_AbsoluteAddress - cycle: int - halted: bool - - # debugging context - debugger_active: bool - - # instruction information - instructions: Dict[str, Callable[[Instruction], None]] - instruction_sets: Set['InstructionSet'] - - # configuration - conf: RunConfig - - def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig): - self.mmu = mmu - self.regs = Registers() - self.conf = conf - - self.instruction_sets = set() - self.instructions = dict() - - for set_class in instruction_sets: - ins_set = set_class(self) - self.instructions.update(ins_set.load()) - self.instruction_sets.add(ins_set) - - self.halted = False - self.cycle = 0 - self.pc = 0 - self.debugger_active = False - - def run_instruction(self, ins: Instruction): - """ - Execute a single instruction - - :param ins: The instruction to execute - """ - if ins.name in self.instructions: - self.instructions[ins.name](ins) - else: - # this should never be reached, as unknown instructions are imparseable - raise RuntimeError("Unknown instruction: {}".format(ins)) - - def load_program(self, program: Program): - self.mmu.load_program(program) - - def __repr__(self): - """ - Returns a representation of the CPU and some of its state. - """ - return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format( - self.__class__.__name__, - self.pc, - self.cycle, - self.halted, - " ".join(s.name for s in self.instruction_sets) - ) - - @abstractmethod - def step(self, verbose=False): - pass - - @abstractmethod - def run(self, verbose=False): - pass - - def launch(self, program: Program, verbose: bool = False): - if program not in self.mmu.programs: - print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE) - return - - self.pc = program.entrypoint - self.run(verbose) - - @classmethod - @abstractmethod - def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]: - pass - - def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]: - return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name)) - - @property - def sections(self): - return self.mmu.sections - - @property - def programs(self): - return self.mmu.programs diff --git a/riscemu/types/__init__.py b/riscemu/types/__init__.py new file mode 100644 index 0000000..7b46fc1 --- /dev/null +++ b/riscemu/types/__init__.py @@ -0,0 +1,26 @@ +from typing import Dict +import re + +# define some base type aliases so we can keep track of absolute and relative addresses +T_RelativeAddress = int +T_AbsoluteAddress = int + +# parser options are just dictionaries with arbitrary values +T_ParserOpts = Dict[str, any] + +NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$') + +from .flags import MemoryFlags +from .int32 import UInt32, Int32 +from .instruction import Instruction +from .instruction_context import InstructionContext +from .memory_section import MemorySection +from .program import Program +from .program_loader import ProgramLoader +from .cpu import CPU +from .simple_instruction import SimpleInstruction +from .instruction_memory_section import InstructionMemorySection +from .binary_data_memory_section import BinaryDataMemorySection + + + diff --git a/riscemu/types/binary_data_memory_section.py b/riscemu/types/binary_data_memory_section.py new file mode 100644 index 0000000..7f85ff1 --- /dev/null +++ b/riscemu/types/binary_data_memory_section.py @@ -0,0 +1,29 @@ +from . import MemorySection, InstructionContext, MemoryFlags, T_RelativeAddress, Instruction +from ..exceptions import MemoryAccessException + + +class BinaryDataMemorySection(MemorySection): + def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None): + self.name = name + self.base = base + self.context = context + self.size = len(data) + self.flags = flags if flags is not None else MemoryFlags(False, False) + self.data = data + self.owner = owner + + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + if offset + size > self.size: + raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read') + return self.data[offset:offset + size] + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + if offset + size > self.size: + raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write') + if len(data[0:size]) != size: + raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write') + self.data[offset:offset + size] = data[0:size] + + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self), + offset, 4, 'instruction fetch') diff --git a/riscemu/types/cpu.py b/riscemu/types/cpu.py new file mode 100644 index 0000000..23de29c --- /dev/null +++ b/riscemu/types/cpu.py @@ -0,0 +1,107 @@ +import typing +from abc import ABC, abstractmethod +from typing import List, Type, Callable, Set, Dict + +from ..registers import Registers +from ..config import RunConfig +from ..colors import FMT_RED, FMT_NONE +from . import T_AbsoluteAddress, Instruction, Program, ProgramLoader + + +class CPU(ABC): + # static cpu configuration + INS_XLEN: int = 4 + + # housekeeping variables + regs: Registers + mmu: 'MMU' + pc: T_AbsoluteAddress + cycle: int + halted: bool + + # debugging context + debugger_active: bool + + # instruction information + instructions: Dict[str, Callable[[Instruction], None]] + instruction_sets: Set['InstructionSet'] + + # configuration + conf: RunConfig + + def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig): + self.mmu = mmu + self.regs = Registers() + self.conf = conf + + self.instruction_sets = set() + self.instructions = dict() + + for set_class in instruction_sets: + ins_set = set_class(self) + self.instructions.update(ins_set.load()) + self.instruction_sets.add(ins_set) + + self.halted = False + self.cycle = 0 + self.pc = 0 + self.debugger_active = False + + def run_instruction(self, ins: Instruction): + """ + Execute a single instruction + + :param ins: The instruction to execute + """ + if ins.name in self.instructions: + self.instructions[ins.name](ins) + else: + # this should never be reached, as unknown instructions are imparseable + raise RuntimeError("Unknown instruction: {}".format(ins)) + + def load_program(self, program: Program): + self.mmu.load_program(program) + + def __repr__(self): + """ + Returns a representation of the CPU and some of its state. + """ + return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format( + self.__class__.__name__, + self.pc, + self.cycle, + self.halted, + " ".join(s.name for s in self.instruction_sets) + ) + + @abstractmethod + def step(self, verbose=False): + pass + + @abstractmethod + def run(self, verbose=False): + pass + + def launch(self, program: Program, verbose: bool = False): + if program not in self.mmu.programs: + print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE) + return + + self.pc = program.entrypoint + self.run(verbose) + + @classmethod + @abstractmethod + def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]: + pass + + def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]: + return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name)) + + @property + def sections(self): + return self.mmu.sections + + @property + def programs(self): + return self.mmu.programs diff --git a/riscemu/types/flags.py b/riscemu/types/flags.py new file mode 100644 index 0000000..7c1a7e7 --- /dev/null +++ b/riscemu/types/flags.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class MemoryFlags: + read_only: bool + executable: bool + + def __repr__(self): + return "r{}{}".format( + '-' if self.read_only else 'w', + 'x' if self.executable else '-' + ) diff --git a/riscemu/types/instruction.py b/riscemu/types/instruction.py new file mode 100644 index 0000000..516d254 --- /dev/null +++ b/riscemu/types/instruction.py @@ -0,0 +1,31 @@ +from abc import ABC, abstractmethod +from typing import Tuple + + +class Instruction(ABC): + name: str + args: tuple + + @abstractmethod + def get_imm(self, num: int) -> int: + """ + parse and get immediate argument + """ + pass + + @abstractmethod + def get_imm_reg(self, num: int) -> Tuple[int, str]: + """ + parse and get an argument imm(reg) + """ + pass + + @abstractmethod + def get_reg(self, num: int) -> str: + """ + parse and get an register argument + """ + pass + + def __repr__(self): + return "{} {}".format(self.name, ", ".join(self.args)) diff --git a/riscemu/types/instruction_context.py b/riscemu/types/instruction_context.py new file mode 100644 index 0000000..6d8678c --- /dev/null +++ b/riscemu/types/instruction_context.py @@ -0,0 +1,53 @@ +from collections import defaultdict +from typing import Dict, List, Optional + +from ..exceptions import ParseException +from ..types import T_AbsoluteAddress, T_RelativeAddress, NUMBER_SYMBOL_PATTERN + + +class InstructionContext: + base_address: T_AbsoluteAddress + """ + The address where the instruction block is placed + """ + + labels: Dict[str, T_RelativeAddress] + """ + This dictionary maps all labels to their relative position of the instruction block + """ + + numbered_labels: Dict[str, List[T_RelativeAddress]] + """ + This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where + the label was placed + """ + + global_symbol_dict: Dict[str, T_AbsoluteAddress] + """ + A reference to the MMU's global symbol dictionary for access to global symbols + """ + + def __init__(self): + self.labels = dict() + self.numbered_labels = defaultdict(list) + self.base_address = 0 + self.global_symbol_dict = dict() + + def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]: + if NUMBER_SYMBOL_PATTERN.match(symbol): + if address_at is None: + raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol)) + + direction = symbol[-1] + values = self.numbered_labels.get(symbol[:-1], []) + if direction == 'b': + return max((addr + self.base_address for addr in values if addr < address_at), default=None) + else: + return min((addr + self.base_address for addr in values if addr > address_at), default=None) + else: + # if it's not a local symbol, try the globals + if symbol not in self.labels: + return self.global_symbol_dict.get(symbol, None) + # otherwise return the local symbol + return self.labels.get(symbol, None) + diff --git a/riscemu/types/instruction_memory_section.py b/riscemu/types/instruction_memory_section.py new file mode 100644 index 0000000..76553b0 --- /dev/null +++ b/riscemu/types/instruction_memory_section.py @@ -0,0 +1,27 @@ +from typing import List + +from . import MemorySection, Instruction, InstructionContext, MemoryFlags, T_RelativeAddress +from .. import MemoryAccessException + + +class InstructionMemorySection(MemorySection): + def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0): + self.name = name + self.base = base + self.context = context + self.size = len(instructions) * 4 + self.flags = MemoryFlags(True, True) + self.instructions = instructions + self.owner = owner + + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read') + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write') + + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + if offset % 4 != 0: + raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch') + return self.instructions[offset // 4] + diff --git a/riscemu/types/int32.py b/riscemu/types/int32.py new file mode 100644 index 0000000..2fa08b7 --- /dev/null +++ b/riscemu/types/int32.py @@ -0,0 +1,202 @@ +from typing import Union +from ctypes import c_int32, c_uint32 + + +class Int32: + _type = c_int32 + __slots__ = ('_val',) + + def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0): + if isinstance(val, (bytes, bytearray)): + self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True)) + elif isinstance(val, self.__class__._type): + self._val = val + elif isinstance(val, (c_uint32, c_int32, Int32)): + self._val = self.__class__._type(val.value) + elif isinstance(val, int): + self._val = self.__class__._type(val) + else: + raise RuntimeError( + "Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val) + ) + + def __add__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + + return self.__class__(self._val.value + other) + + def __sub__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value - other) + + def __mul__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value * other) + + def __truediv__(self, other): + return self // other + + def __floordiv__(self, other): + if isinstance(other, Int32): + other = other.value + return self.__class__(self.value // other) + + def __mod__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value % other) + + def __and__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value & other) + + def __or__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value | other) + + def __xor__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value ^ other) + + def __lshift__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self.value << other) + + def __rshift__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self.value >> other) + + def __eq__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.value == other + + def __neg__(self): + return self.__class__(-self._val.value) + + def __abs__(self): + return self.__class__(abs(self.value)) + + def __bytes__(self): + return self.to_bytes(4) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, self.value) + + def __str__(self): + return str(self.value) + + def __format__(self, format_spec): + return self.value.__format__(format_spec) + + def __hash__(self): + return hash(self.value) + + def __gt__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value > other + + def __lt__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value < other + + def __le__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value <= other + + def __ge__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value >= other + + def __bool__(self): + return bool(self.value) + + def __cmp__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value.__cmp__(other) + + # right handed binary operators + + def __radd__(self, other): + return self + other + + def __rsub__(self, other): + return self.__class__(other) - self + + def __rmul__(self, other): + return self * other + + def __rtruediv__(self, other): + return self.__class__(other) // self + + def __rfloordiv__(self, other): + return self.__class__(other) // self + + def __rmod__(self, other): + return self.__class__(other) % self + + def __rand__(self, other): + return self.__class__(other) & self + + def __ror__(self, other): + return self.__class__(other) | self + + def __rxor__(self, other): + return self.__class__(other) ^ self + + @property + def value(self): + return self._val.value + + def unsigned(self) -> 'UInt32': + return UInt32(self) + + def to_bytes(self, bytes: int = 4) -> bytearray: + return bytearray(self.unsigned_value.to_bytes(bytes, 'little')) + + def signed(self) -> 'Int32': + if self.__class__ == Int32: + return self + return Int32(self) + + @property + def unsigned_value(self): + return c_uint32(self.value).value + + def shift_right_logical(self, ammount: Union['Int32', int]): + if isinstance(ammount, Int32): + ammount = ammount.value + return self.__class__((self.value % 0x100000000) >> ammount) + + def __int__(self): + return self.value + + def __hex__(self): + return hex(self.value) + + +class UInt32(Int32): + _type = c_uint32 + + def unsigned(self) -> 'UInt32': + return self + + @property + def unsigned_value(self): + return self._val.value + + def shift_right_logical(self, ammount: Union['Int32', int]): + return self >> ammount diff --git a/riscemu/types/memory_section.py b/riscemu/types/memory_section.py new file mode 100644 index 0000000..a1512e4 --- /dev/null +++ b/riscemu/types/memory_section.py @@ -0,0 +1,88 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Optional + +from ..colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE +from ..helpers import format_bytes +from . import MemoryFlags, T_AbsoluteAddress, InstructionContext, T_RelativeAddress, Instruction + + +@dataclass +class MemorySection(ABC): + name: str + flags: MemoryFlags + size: int + base: T_AbsoluteAddress + owner: str + context: InstructionContext + + @property + def end(self): + return self.base + self.size + + @abstractmethod + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + pass + + @abstractmethod + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + pass + + @abstractmethod + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + pass + + def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex', + bytes_per_row: int = 16, rows: int = 10, group: int = 4): + if self.flags.executable: + bytes_per_row = 4 + highlight = None + if end is None: + end = min(start + (bytes_per_row * (rows // 2)), self.size - 1) + highlight = start + start = max(0, start - (bytes_per_row * (rows // 2))) + + if self.flags.executable: + print(FMT_MEM + "{}, viewing {} instructions:".format( + self, (end - start) // 4 + ) + FMT_NONE) + + for addr in range(start, end, 4): + if addr == highlight: + print(FMT_UNDERLINE + FMT_ORANGE, end='') + print("0x{:04x}: {}{}".format( + self.base + addr, self.read_ins(addr), FMT_NONE + )) + else: + print(FMT_MEM + "{}, viewing {} bytes:".format( + self, (end - start) + ) + FMT_NONE) + + aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end + + for addr in range(start, aligned_end, bytes_per_row): + hi_ind = (highlight - addr) // group if highlight is not None else -1 + print("0x{:04x}: {}{}".format( + self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE + )) + + if aligned_end != end: + hi_ind = (highlight - aligned_end) // group if highlight is not None else -1 + print("0x{:04x}: {}{}".format( + self.base + aligned_end, format_bytes( + self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind + ), FMT_NONE + )) + + def dump_all(self, *args, **kwargs): + self.dump(0, self.size, *args, **kwargs) + + def __repr__(self): + return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format( + self.__class__.__name__, + self.name, + self.base, + self.size, + self.flags, + self.owner + ) diff --git a/riscemu/types/program.py b/riscemu/types/program.py new file mode 100644 index 0000000..24533f7 --- /dev/null +++ b/riscemu/types/program.py @@ -0,0 +1,104 @@ +from typing import List, Optional, Set + +from ..colors import FMT_RED, FMT_BOLD, FMT_NONE, FMT_MEM +from ..helpers import get_section_base_name +from . import InstructionContext, T_AbsoluteAddress, MemorySection + + +class Program: + """ + This represents a collection of sections which together form an executable program + + When you want to create a program which can be located anywhere in memory, set base to None, + this signals the other components, that this is relocatable. Set the base of each section to + the offset in the program, and everything will be taken care of for you. + + """ + name: str + context: InstructionContext + global_labels: Set[str] + relative_labels: Set[str] + sections: List[MemorySection] + base: Optional[T_AbsoluteAddress] + is_loaded: bool + + @property + def size(self): + if len(self.sections) == 0: + return 0 + if self.base is None: + return self.sections[-1].base + self.sections[-1].size + return (self.sections[-1].base - self.base) + self.sections[-1].size + + def __init__(self, name: str, base: Optional[int] = None): + self.name = name + self.context = InstructionContext() + self.sections = [] + self.global_labels = set() + self.relative_labels = set() + self.base = base + self.is_loaded = False + + def add_section(self, sec: MemorySection): + # print a warning when a section is located before the programs base + if self.base is not None: + if sec.base < self.base: + print( + FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format( + sec, self.name, self.base + ) + FMT_NONE) + + self.sections.append(sec) + # keep section list ordered + self.sections.sort(key=lambda section: section.base) + + def __repr__(self): + return "{}(name={},sections={},base={})".format( + self.__class__.__name__, self.name, self.global_labels, + [s.name for s in self.sections], self.base + ) + + @property + def entrypoint(self): + if '_start' in self.context.labels: + return self.context.labels.get('_start') + if 'main' in self.context.labels: + return self.context.labels.get('main') + for sec in self.sections: + if get_section_base_name(sec.name) == '.text' and sec.flags.executable: + return sec.base + + def loaded_trigger(self, at_addr: T_AbsoluteAddress): + """ + This trigger is called when the binary is loaded and its final address in memory is determined + + This will do a small sanity check to prevent programs loading twice, or at addresses they don't + expect to be loaded. + + Then it will finalize all relative symbols defined in it to point to the correct addresses. + + :param at_addr: the address where the program will be located + """ + if self.is_loaded: + if at_addr != self.base: + raise RuntimeError("Program loaded twice at different addresses! This will probably break things!") + return + + if self.base is not None and self.base != at_addr: + print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, ' + 'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE) + + # check if we are relocating + if self.base != at_addr: + offset = at_addr if self.base is None else at_addr - self.base + + # move all sections by the offset + for sec in self.sections: + sec.base += offset + + # move all relative symbols by the offset + for name in self.relative_labels: + self.context.labels[name] += offset + + self.base = at_addr + self.context.base_address = at_addr diff --git a/riscemu/types/program_loader.py b/riscemu/types/program_loader.py new file mode 100644 index 0000000..e951749 --- /dev/null +++ b/riscemu/types/program_loader.py @@ -0,0 +1,58 @@ +import os +from abc import abstractmethod, ABC +from typing import Union, Iterator, List + +from . import T_ParserOpts, Program + + +class ProgramLoader(ABC): + """ + A program loader is always specific to a given source file. It is a place to store all state + concerning the parsing and loading of that specific source file, including options. + """ + + def __init__(self, source_path: str, options: T_ParserOpts): + self.source_path = source_path + self.options = options + self.filename = os.path.split(self.source_path)[-1] + + @classmethod + @abstractmethod + def can_parse(cls, source_path: str) -> float: + """ + Return confidence that the file located at source_path + should be parsed and loaded by this loader + :param source_path: the path of the source file + :return: the confidence that this file belongs to this parser + """ + pass + + @classmethod + @abstractmethod + def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]: + """ + parse command line args into an options dictionary + + :param argv: the command line args list + :return: all remaining command line args and the parser options object + """ + pass + + @classmethod + def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader': + """ + Instantiate a loader for the given source file with the required arguments + + :param source_path: the path to the source file + :param options: the parsed options (guaranteed to come from this classes get_options method. + :return: An instance of a ProgramLoader for the spcified source + """ + return cls(source_path, options) + + @abstractmethod + def parse(self) -> Union[Program, Iterator[Program]]: + """ + + :return: + """ + pass diff --git a/riscemu/types/simple_instruction.py b/riscemu/types/simple_instruction.py new file mode 100644 index 0000000..59d7b6c --- /dev/null +++ b/riscemu/types/simple_instruction.py @@ -0,0 +1,26 @@ +from typing import Union, Tuple + +from . import Instruction, T_RelativeAddress, InstructionContext +from ..helpers import parse_numeric_argument + + +class SimpleInstruction(Instruction): + def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]], + context: InstructionContext, addr: T_RelativeAddress): + self.context = context + self.name = name + self.args = args + self.addr = addr + + def get_imm(self, num: int) -> int: + resolved_label = self.context.resolve_label(self.args[num], self.addr) + if resolved_label is None: + return parse_numeric_argument(self.args[num]) + return resolved_label + + def get_imm_reg(self, num: int) -> Tuple[int, str]: + return self.get_imm(num + 1), self.get_reg(num) + + def get_reg(self, num: int) -> str: + return self.args[num] + diff --git a/setup.py b/setup.py index edaae98..c702df6 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setuptools.setup( "Operating System :: OS Independent", ], package_dir={"": "."}, - packages=["riscemu", "riscemu.decoder", "riscemu.instructions", "riscemu.IO", "riscemu.priv"], + packages=["riscemu", "riscemu.decoder", "riscemu.instructions", "riscemu.IO", "riscemu.priv", "riscemu.types"], python_requires=">=3.6", install_requires=[ "pyelftools~=0.27" From cc3df91fd103106fee04790b665b8b0534dc42ae Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 19:11:10 +0200 Subject: [PATCH 20/30] [restructured] moved more types and exceptions to riscemu.types --- riscemu/CPU.py | 2 +- riscemu/MMU.py | 2 +- riscemu/__init__.py | 2 +- riscemu/assembler.py | 2 +- riscemu/helpers.py | 8 ++++---- riscemu/instructions/RV32A.py | 2 +- riscemu/instructions/RV32I.py | 3 +-- riscemu/instructions/RV32M.py | 2 +- riscemu/instructions/instruction_set.py | 2 +- riscemu/parser.py | 2 +- riscemu/priv/PrivRV32I.py | 2 +- riscemu/tokenizer.py | 6 +++--- riscemu/types/__init__.py | 7 +++++-- riscemu/types/binary_data_memory_section.py | 2 +- riscemu/{ => types}/exceptions.py | 4 ++-- riscemu/types/instruction_context.py | 2 +- 16 files changed, 26 insertions(+), 24 deletions(-) rename riscemu/{ => types}/exceptions.py (98%) diff --git a/riscemu/CPU.py b/riscemu/CPU.py index ce405ca..0fc0197 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -14,7 +14,7 @@ from .config import RunConfig from .MMU import MMU from .colors import FMT_CPU, FMT_NONE from .debug import launch_debug_session -from .exceptions import RiscemuBaseException, LaunchDebuggerException +from .types.exceptions import RiscemuBaseException, LaunchDebuggerException from .syscall import SyscallInterface, get_syscall_symbols from .types import CPU, ProgramLoader, Int32, BinaryDataMemorySection from .parser import AssemblyFileLoader diff --git a/riscemu/MMU.py b/riscemu/MMU.py index fdbf822..212c1f3 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -7,10 +7,10 @@ SPDX-License-Identifier: MIT from typing import Dict, List, Optional, Union from .colors import * -from .exceptions import InvalidAllocationException, MemoryAccessException from .helpers import align_addr from .types import Instruction, MemorySection, MemoryFlags, T_AbsoluteAddress, \ Program, InstructionContext, Int32 +from .types.exceptions import InvalidAllocationException, MemoryAccessException class MMU: diff --git a/riscemu/__init__.py b/riscemu/__init__.py index 6c39581..dc78b53 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -8,7 +8,7 @@ This package aims at providing an all-round usable RISC-V emulator and debugger It contains everything needed to run assembly files, so you don't need any custom compilers or toolchains """ -from .exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ +from .types.exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException from .instructions import * diff --git a/riscemu/assembler.py b/riscemu/assembler.py index 121201a..793484e 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -3,7 +3,7 @@ from typing import List from typing import Optional, Tuple, Union from .colors import FMT_PARSE, FMT_NONE -from .exceptions import ParseException, ASSERT_LEN +from riscemu.types.exceptions import ParseException, ASSERT_LEN from .helpers import parse_numeric_argument, align_addr, get_section_base_name from .tokenizer import Token from .types import Program, T_RelativeAddress, InstructionContext, Instruction, BinaryDataMemorySection, InstructionMemorySection diff --git a/riscemu/helpers.py b/riscemu/helpers.py index 82774d1..843a85e 100644 --- a/riscemu/helpers.py +++ b/riscemu/helpers.py @@ -7,8 +7,8 @@ SPDX-License-Identifier: MIT from math import log10, ceil from typing import Iterable, Iterator, TypeVar, Generic, List, Optional -from .exceptions import * -import types +from .types.exceptions import * +from .types import Int32, UInt32 def align_addr(addr: int, to_bytes: int = 8) -> int: @@ -55,10 +55,10 @@ def format_bytes(byte_arr: bytearray, fmt: str, group: int = 1, highlight: int = return highlight_in_list(['0x{}'.format(ch.hex()) for ch in chunks], highlight) if fmt == 'int': spc = str(ceil(log10(2 ** (group * 8 - 1))) + 1) - return highlight_in_list([('{:0' + spc + 'd}').format(types.Int32(ch)) for ch in chunks], highlight) + return highlight_in_list([('{:0' + spc + 'd}').format(Int32(ch)) for ch in chunks], highlight) if fmt == 'uint': spc = str(ceil(log10(2 ** (group * 8)))) - return highlight_in_list([('{:0' + spc + 'd}').format(types.UInt32(ch)) for ch in chunks], + return highlight_in_list([('{:0' + spc + 'd}').format(UInt32(ch)) for ch in chunks], highlight) if fmt == 'ascii': return "".join(repr(chr(b))[1:-1] for b in byte_arr) diff --git a/riscemu/instructions/RV32A.py b/riscemu/instructions/RV32A.py index c7f7c15..af92130 100644 --- a/riscemu/instructions/RV32A.py +++ b/riscemu/instructions/RV32A.py @@ -1,5 +1,5 @@ from .instruction_set import InstructionSet, Instruction -from ..exceptions import INS_NOT_IMPLEMENTED +from riscemu.types.exceptions import INS_NOT_IMPLEMENTED from ..types import Int32, UInt32 diff --git a/riscemu/instructions/RV32I.py b/riscemu/instructions/RV32I.py index 26d0bd9..50d2076 100644 --- a/riscemu/instructions/RV32I.py +++ b/riscemu/instructions/RV32I.py @@ -8,8 +8,7 @@ from .instruction_set import * from ..CPU import UserModeCPU from ..colors import FMT_DEBUG, FMT_NONE -from ..debug import launch_debug_session -from ..exceptions import LaunchDebuggerException +from riscemu.types.exceptions import LaunchDebuggerException from ..syscall import Syscall from ..types import Instruction, Int32, UInt32 diff --git a/riscemu/instructions/RV32M.py b/riscemu/instructions/RV32M.py index d8ae08b..dda5ee4 100644 --- a/riscemu/instructions/RV32M.py +++ b/riscemu/instructions/RV32M.py @@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT """ from .instruction_set import * -from ..exceptions import INS_NOT_IMPLEMENTED +from riscemu.types.exceptions import INS_NOT_IMPLEMENTED class RV32M(InstructionSet): diff --git a/riscemu/instructions/instruction_set.py b/riscemu/instructions/instruction_set.py index e0d3f06..ef91d18 100644 --- a/riscemu/instructions/instruction_set.py +++ b/riscemu/instructions/instruction_set.py @@ -8,7 +8,7 @@ from typing import Tuple, Callable, Dict from abc import ABC from ..CPU import CPU -from ..exceptions import ASSERT_LEN, ASSERT_IN +from riscemu.types.exceptions import ASSERT_LEN, ASSERT_IN from ..types import Instruction, Int32, UInt32 diff --git a/riscemu/parser.py b/riscemu/parser.py index cd9ece0..d91c40f 100644 --- a/riscemu/parser.py +++ b/riscemu/parser.py @@ -8,10 +8,10 @@ from typing import Dict, Tuple, Iterable, Callable, List from .assembler import MemorySectionType, ParseContext, AssemblerDirectives from .colors import FMT_PARSE -from .exceptions import ParseException from .helpers import Peekable from .tokenizer import Token, TokenType, tokenize from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction +from .types.exceptions import ParseException def parse_instruction(token: Token, args: Tuple[str], context: ParseContext): diff --git a/riscemu/priv/PrivRV32I.py b/riscemu/priv/PrivRV32I.py index 409f2ef..132c7fd 100644 --- a/riscemu/priv/PrivRV32I.py +++ b/riscemu/priv/PrivRV32I.py @@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT """ from ..instructions.RV32I import * -from ..exceptions import INS_NOT_IMPLEMENTED +from riscemu.types.exceptions import INS_NOT_IMPLEMENTED from .Exceptions import * from .privmodes import PrivModes from ..colors import FMT_CPU, FMT_NONE diff --git a/riscemu/tokenizer.py b/riscemu/tokenizer.py index e855b9d..18fa898 100644 --- a/riscemu/tokenizer.py +++ b/riscemu/tokenizer.py @@ -7,10 +7,10 @@ SPDX-License-Identifier: MIT import re from dataclasses import dataclass from enum import Enum, auto -from typing import List, Iterable, Optional -from riscemu.decoder import RISCV_REGS +from typing import List, Iterable -from .exceptions import ParseException +from riscemu.decoder import RISCV_REGS +from riscemu.types.exceptions import ParseException LINE_COMMENT_STARTERS = ('#', ';', '//') WHITESPACE_PATTERN = re.compile(r'\s+') diff --git a/riscemu/types/__init__.py b/riscemu/types/__init__.py index 7b46fc1..d703372 100644 --- a/riscemu/types/__init__.py +++ b/riscemu/types/__init__.py @@ -10,6 +10,7 @@ T_ParserOpts = Dict[str, any] NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$') +# base classes from .flags import MemoryFlags from .int32 import UInt32, Int32 from .instruction import Instruction @@ -22,5 +23,7 @@ from .simple_instruction import SimpleInstruction from .instruction_memory_section import InstructionMemorySection from .binary_data_memory_section import BinaryDataMemorySection - - +# exceptions +from .exceptions import ParseException, NumberFormatException, MemoryAccessException, OutOfMemoryException, \ + LinkerException, LaunchDebuggerException, RiscemuBaseException, InvalidRegisterException, \ + InvalidAllocationException, InvalidSyscallException, UnimplementedInstruction diff --git a/riscemu/types/binary_data_memory_section.py b/riscemu/types/binary_data_memory_section.py index 7f85ff1..86bdd77 100644 --- a/riscemu/types/binary_data_memory_section.py +++ b/riscemu/types/binary_data_memory_section.py @@ -1,5 +1,5 @@ from . import MemorySection, InstructionContext, MemoryFlags, T_RelativeAddress, Instruction -from ..exceptions import MemoryAccessException +from ..types.exceptions import MemoryAccessException class BinaryDataMemorySection(MemorySection): diff --git a/riscemu/exceptions.py b/riscemu/types/exceptions.py similarity index 98% rename from riscemu/exceptions.py rename to riscemu/types/exceptions.py index 3e95dc7..53af40a 100644 --- a/riscemu/exceptions.py +++ b/riscemu/types/exceptions.py @@ -5,11 +5,11 @@ SPDX-License-Identifier: MIT """ from abc import abstractmethod -from .colors import * +from ..colors import * import typing if typing.TYPE_CHECKING: - from .types import Instruction + from . import Instruction class RiscemuBaseException(BaseException): diff --git a/riscemu/types/instruction_context.py b/riscemu/types/instruction_context.py index 6d8678c..629b090 100644 --- a/riscemu/types/instruction_context.py +++ b/riscemu/types/instruction_context.py @@ -1,7 +1,7 @@ from collections import defaultdict from typing import Dict, List, Optional -from ..exceptions import ParseException +from .exceptions import ParseException from ..types import T_AbsoluteAddress, T_RelativeAddress, NUMBER_SYMBOL_PATTERN From 4ca475da69cd94dd26a20266ac13d6a19bc41269 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 20:22:11 +0200 Subject: [PATCH 21/30] improved the MMU.translate_address function --- riscemu/MMU.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/riscemu/MMU.py b/riscemu/MMU.py index 212c1f3..a455bd8 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -154,16 +154,30 @@ class MMU: bin = self.get_bin_containing(address) secs = set(sec.name for sec in bin.sections) if bin else [] + elf_markers = { + '__global_pointer$', '_fdata', '_etext', '_gp', + '_bss_start', '_bss_end', '_ftext', '_edata', '_end', '_fbss' + } def key(x): name, val = x - - if name in secs or val > address: - return float('inf') return address - val - name, val = min(sec.context.labels.items(), key=key, default=('.empty', None)) - if val is None: + best_fit = iter(sorted(filter(lambda x: x[1] <= address, sec.context.labels.items()), key=key)) + + best = ('', float('inf')) + for name, val in best_fit: + if address - val < best[1]: + best = (name, val) + if address - val == best[1]: + if best[0] in elf_markers: + best = (name, val) + elif best[0] in secs and name not in elf_markers: + best = (name, val) + + name, val = best + + if not name: return "unknown at 0x{:0x}".format(address) return str('{}:{} at {} (0x{:0x}) + 0x{:0x}'.format( @@ -274,6 +288,4 @@ class MMU: if owner: print("owned by: {}".format(owner[0])) - print("{}: 0x{:0x} + 0x{:0x}".format(name, val, addr - val)) - From b5e20ed39b060f3ae9a5fbea7f8a320813af7ae1 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 20:22:31 +0200 Subject: [PATCH 22/30] added docstrings to Int32 and UInt32 classes --- riscemu/types/int32.py | 59 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/riscemu/types/int32.py b/riscemu/types/int32.py index 2fa08b7..ab0f587 100644 --- a/riscemu/types/int32.py +++ b/riscemu/types/int32.py @@ -3,6 +3,14 @@ from ctypes import c_int32, c_uint32 class Int32: + """ + This class implements 32bit signed integers (see :class:`UInt32` for unsigned integers) + + It implements basically all mathematical dunder magic methods (__add__, __sub__, etc.) + + You can use it just like you would any other integer, just be careful when passing it + to functions which actually expect an integer and not a Int32. + """ _type = c_int32 __slots__ = ('_val',) @@ -158,25 +166,55 @@ class Int32: return self.__class__(other) ^ self @property - def value(self): + def value(self) -> int: + """ + The value represented by this Integer + :return: + """ return self._val.value def unsigned(self) -> 'UInt32': + """ + Convert to an unsigned representation. See :class:Uint32 + :return: + """ return UInt32(self) def to_bytes(self, bytes: int = 4) -> bytearray: + """ + Convert to a bytearray of length :param:bytes + + :param bytes: The length of the bytearray + :return: A little-endian representation of the contained integer + """ return bytearray(self.unsigned_value.to_bytes(bytes, 'little')) def signed(self) -> 'Int32': + """ + Convert to a signed representation. See :class:Int32 + :return: + """ if self.__class__ == Int32: return self return Int32(self) @property def unsigned_value(self): + """ + Return the value interpreted as an unsigned integer + :return: + """ return c_uint32(self.value).value - def shift_right_logical(self, ammount: Union['Int32', int]): + def shift_right_logical(self, ammount: Union['Int32', int]) -> 'Int32': + """ + This function implements logical right shifts, meaning that the sign bit is shifted as well. + + This is equivalent to (self.value % 0x100000000) >> ammount + + :param ammount: Number of positions to shift + :return: A new Int32 object representing the shifted value (keeps the signed-ness of the source) + """ if isinstance(ammount, Int32): ammount = ammount.value return self.__class__((self.value % 0x100000000) >> ammount) @@ -189,14 +227,27 @@ class Int32: class UInt32(Int32): + """ + An unsigned version of :class:Int32. + """ _type = c_uint32 def unsigned(self) -> 'UInt32': + """ + Return a new instance representing the same bytes, but signed + :return: + """ return self @property - def unsigned_value(self): + def unsigned_value(self) -> int: return self._val.value - def shift_right_logical(self, ammount: Union['Int32', int]): + def shift_right_logical(self, ammount: Union['Int32', int]) -> 'UInt32': + """ + see :meth:`Int32.shift_right_logical ` + + :param ammount: Number of positions to shift + :return: A new Int32 object representing the shifted value (keeps the signed-ness of the source) + """ return self >> ammount From 61540dfcb7eee4a699046dc30a66240e69ed4c3f Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 20:23:10 +0200 Subject: [PATCH 23/30] [docs] improved documentation build --- .readthedocs.yaml | 2 +- generate-docs.sh | 2 +- sphinx-docs/source/conf.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 2dc6d8d..efce204 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -12,7 +12,7 @@ sphinx: # Optionally set the version of Python and requirements required to build your docs python: - version: 3.7 + version: "3.7" system_packages: true install: - requirements: sphinx-docs/requirements.txt diff --git a/generate-docs.sh b/generate-docs.sh index 4e7e983..efb35a6 100755 --- a/generate-docs.sh +++ b/generate-docs.sh @@ -6,7 +6,7 @@ echo "Generating docs!" if ! command -v 'sphinx-apidoc'; then source venv/bin/activate - pip install sphinx + pip install -r sphinx-docs/requirements.txt fi diff --git a/sphinx-docs/source/conf.py b/sphinx-docs/source/conf.py index 9a5b294..c2b68af 100644 --- a/sphinx-docs/source/conf.py +++ b/sphinx-docs/source/conf.py @@ -24,7 +24,7 @@ if os.getenv('READTHEDOCS', False) and not os.path.exists('riscemu.rst'): # -- Project information ----------------------------------------------------- project = 'RiscEmu' -copyright = '2021, Anton Lydike' +copyright = '2022, Anton Lydike' author = 'Anton Lydike' # The full version, including alpha/beta/rc tags From 57f827ba6a4b4f4d6f56212154c1ab2022b4555f Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 20:24:26 +0200 Subject: [PATCH 24/30] updated version to 2.0.0a2 --- riscemu/__init__.py | 4 ++-- sphinx-docs/source/conf.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/riscemu/__init__.py b/riscemu/__init__.py index dc78b53..013d75e 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -24,5 +24,5 @@ from .config import RunConfig from .parser import tokenize, parse_tokens, AssemblyFileLoader __author__ = "Anton Lydike " -__copyright__ = "Copyright 2021 Anton Lydike" -__version__ = '2.0.0a1' +__copyright__ = "Copyright 2022 Anton Lydike" +__version__ = '2.0.0a2' diff --git a/sphinx-docs/source/conf.py b/sphinx-docs/source/conf.py index c2b68af..37d8b00 100644 --- a/sphinx-docs/source/conf.py +++ b/sphinx-docs/source/conf.py @@ -28,7 +28,7 @@ copyright = '2022, Anton Lydike' author = 'Anton Lydike' # The full version, including alpha/beta/rc tags -release = '0.1.0' +release = '2.0.0a2' # -- General configuration --------------------------------------------------- From fa4a9b92f31ad3d864454c87f3a57dcec569dd82 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 20:26:09 +0200 Subject: [PATCH 25/30] fixed imports in types/instruction_memory_section --- riscemu/types/instruction_memory_section.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscemu/types/instruction_memory_section.py b/riscemu/types/instruction_memory_section.py index 76553b0..1a2e8e0 100644 --- a/riscemu/types/instruction_memory_section.py +++ b/riscemu/types/instruction_memory_section.py @@ -1,7 +1,7 @@ from typing import List from . import MemorySection, Instruction, InstructionContext, MemoryFlags, T_RelativeAddress -from .. import MemoryAccessException +from .exceptions import MemoryAccessException class InstructionMemorySection(MemorySection): From b7f13651554a38ec3dd6b402dbd58ce0e55d5be0 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 22:00:22 +0200 Subject: [PATCH 26/30] ported syscalls to Int32 usage and removed unecessary prints --- README.md | 3 +-- riscemu/assembler.py | 1 - riscemu/syscall.py | 24 ++++++++++++------------ 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index e47bfcf..a8b2b83 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ or [riscemu.datenvorr.at](https://riscemu.datenvorr.at/index.html). This emulator contains: * RISC-V Assembly parser * RISC-V Assembly loader -* Emulation for parts of the basic RISC-V instruction set +* Emulation for most parts of the basic RISC-V instruction set and the M and A extensions * Naive memory emulator * Basic implementation of some syscalls * A debugging environment @@ -97,7 +97,6 @@ generate and make all doc files for you. Finally, you can open the docs locall b * RISC-V reference card: https://www.cl.cam.ac.uk/teaching/1617/ECAD+Arch/files/docs/RISCVGreenCardv8-20151013.pdf ## TODO: - * Currently registers don't enforce 32 bit (no overflows etc) * Correctly handle 12 and 20 bit immediate (currently not limited to bits at all) * Add a cycle limit to the options and CPU to catch infinite loops * Move away from `print` and use `logging.logger` instead diff --git a/riscemu/assembler.py b/riscemu/assembler.py index 793484e..59165bf 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -84,7 +84,6 @@ class ParseContext: base = 0 if self.section is not None: base = align_addr(self.section.current_address(), alignment) - print("base at {}".format(base)) self._finalize_section() self.section = CurrentSection(name, type, base) diff --git a/riscemu/syscall.py b/riscemu/syscall.py index e46c49a..c7a1c3c 100644 --- a/riscemu/syscall.py +++ b/riscemu/syscall.py @@ -52,7 +52,7 @@ class Syscall: ) def ret(self, code): - self.cpu.regs.set('a0', code) + self.cpu.regs.set('a0', Int32(code)) def get_syscall_symbols(): @@ -91,9 +91,9 @@ class SyscallInterface: read syscall (63): read from file no a0, into addr a1, at most a2 bytes on return a0 will be the number of read bytes or -1 if an error occured """ - fileno = scall.cpu.regs.get('a0') - addr = scall.cpu.regs.get('a1') - size = scall.cpu.regs.get('a2') + fileno = scall.cpu.regs.get('a0').unsigned_value + addr = scall.cpu.regs.get('a1').unsigned_value + size = scall.cpu.regs.get('a2').unsigned_value if fileno not in self.open_files: scall.cpu.regs.set('a0', -1) return @@ -113,9 +113,9 @@ class SyscallInterface: write syscall (64): write a2 bytes from addr a1 into fileno a0 on return a0 will hold the number of bytes written or -1 if an error occured """ - fileno = scall.cpu.regs.get('a0') - addr = scall.cpu.regs.get('a1') - size = scall.cpu.regs.get('a2') + fileno = scall.cpu.regs.get('a0').unsigned_value + addr = scall.cpu.regs.get('a1').unsigned_value + size = scall.cpu.regs.get('a2').unsigned_value if fileno not in self.open_files: return scall.ret(-1) @@ -147,9 +147,9 @@ class SyscallInterface: print(FMT_SYSCALL + '[Syscall] open: opening files not supported without scall-fs flag!' + FMT_NONE) return scall.ret(-1) - mode = scall.cpu.regs.get('a0') - addr = scall.cpu.regs.get('a1') - size = scall.cpu.regs.get('a2') + mode = scall.cpu.regs.get('a0').unsigned_value + addr = scall.cpu.regs.get('a1').unsigned_value + size = scall.cpu.regs.get('a2').unsigned_value mode_st = OPEN_MODES.get(mode, ) if mode_st == -1: @@ -176,7 +176,7 @@ class SyscallInterface: return -1 if an error was encountered, otherwise returns 0 """ - fileno = scall.cpu.regs.get('a0') + fileno = scall.cpu.regs.get('a0').unsigned_value if fileno not in self.open_files: print(FMT_SYSCALL + '[Syscall] close: unknown fileno {}!'.format(fileno) + FMT_NONE) return scall.ret(-1) @@ -191,7 +191,7 @@ class SyscallInterface: Exit syscall. Exits the system with status code a0 """ scall.cpu.halted = True - scall.cpu.exit_code = scall.cpu.regs.get('a0') + scall.cpu.exit_code = scall.cpu.regs.get('a0').value def __repr__(self): return "{}(\n\tfiles={}\n)".format( From c2b6385523c0a5adce5b4559958b74ab2761119a Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 22:10:20 +0200 Subject: [PATCH 27/30] version 2.0.0a3 --- riscemu/__init__.py | 2 +- riscemu/__main__.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/riscemu/__init__.py b/riscemu/__init__.py index 013d75e..0e6d850 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -25,4 +25,4 @@ from .parser import tokenize, parse_tokens, AssemblyFileLoader __author__ = "Anton Lydike " __copyright__ = "Copyright 2022 Anton Lydike" -__version__ = '2.0.0a2' +__version__ = '2.0.0a3' diff --git a/riscemu/__main__.py b/riscemu/__main__.py index 96ac45e..cc62114 100644 --- a/riscemu/__main__.py +++ b/riscemu/__main__.py @@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT This file holds the logic for starting the emulator from the CLI """ -from riscemu import RiscemuBaseException +from riscemu import RiscemuBaseException, __copyright__, __version__ from riscemu.CPU import UserModeCPU if __name__ == '__main__': @@ -18,6 +18,12 @@ if __name__ == '__main__': all_ins_names = list(InstructionSetDict.keys()) + if '--version' in sys.argv: + print("riscemu version {}\n{}\n\nAvailable ISA: {}".format( + __version__, __copyright__, + ", ".join(InstructionSetDict.keys()) + )) + sys.exit() class OptionStringAction(argparse.Action): def __init__(self, option_strings, dest, keys=None, omit_empty=False, **kwargs): From 4004c5ee6d26d86178dc4c114faa5a4169b9898e Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sun, 27 Mar 2022 23:50:28 +0200 Subject: [PATCH 28/30] squashing bugs related to Int32 wrapping and sign extension --- riscemu/instructions/RV32I.py | 24 ++++++++++++------------ riscemu/registers.py | 2 +- riscemu/types/int32.py | 24 ++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/riscemu/instructions/RV32I.py b/riscemu/instructions/RV32I.py index 50d2076..9955743 100644 --- a/riscemu/instructions/RV32I.py +++ b/riscemu/instructions/RV32I.py @@ -24,11 +24,11 @@ class RV32I(InstructionSet): def instruction_lb(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 1))) + self.regs.set(rd, Int32.sign_extend(self.mmu.read(addr.unsigned_value, 1), 8)) def instruction_lh(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 2))) + self.regs.set(rd, Int32.sign_extend(self.mmu.read(addr.unsigned_value, 2), 16)) def instruction_lw(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) @@ -36,23 +36,23 @@ class RV32I(InstructionSet): def instruction_lbu(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.regs.set(rd, UInt32(self.mmu.read(addr.unsigned_value, 1))) + self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 1))) def instruction_lhu(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.regs.set(rd, UInt32(self.mmu.read(addr.unsigned_value, 2))) + self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 2))) def instruction_sb(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.mmu.write(addr.value, 1, self.regs.get(rd).to_bytes(1)) + self.mmu.write(addr.unsigned_value, 1, self.regs.get(rd).to_bytes(1)) def instruction_sh(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.mmu.write(addr.value, 2, self.regs.get(rd).to_bytes(2)) + self.mmu.write(addr.unsigned_value, 2, self.regs.get(rd).to_bytes(2)) def instruction_sw(self, ins: 'Instruction'): rd, addr = self.parse_mem_ins(ins) - self.mmu.write(addr.value, 4, self.regs.get(rd).to_bytes(4)) + self.mmu.write(addr.unsigned_value, 4, self.regs.get(rd).to_bytes(4)) def instruction_sll(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 3) @@ -140,7 +140,7 @@ class RV32I(InstructionSet): def instruction_lui(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) - imm = UInt32(ins.get_imm(1)) << 12 + imm = UInt32(ins.get_imm(1) << 12) self.regs.set(reg, Int32(imm)) def instruction_auipc(self, ins: 'Instruction'): @@ -263,14 +263,14 @@ class RV32I(InstructionSet): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) addr = ins.get_imm(1) - self.regs.set(reg, self.pc) + self.regs.set(reg, Int32(self.pc)) self.pc = addr def instruction_jalr(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) addr = ins.get_imm(1) - self.regs.set(reg, self.pc) + self.regs.set(reg, Int32(self.pc)) self.pc = addr def instruction_ret(self, ins: 'Instruction'): @@ -307,13 +307,13 @@ class RV32I(InstructionSet): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) immediate = ins.get_imm(1) - self.regs.set(reg, immediate) + self.regs.set(reg, Int32(immediate)) def instruction_la(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) reg = ins.get_reg(0) immediate = ins.get_imm(1) - self.regs.set(reg, immediate) + self.regs.set(reg, Int32(immediate)) def instruction_mv(self, ins: 'Instruction'): ASSERT_LEN(ins.args, 2) diff --git a/riscemu/registers.py b/riscemu/registers.py index caa7e36..cce48c4 100644 --- a/riscemu/registers.py +++ b/riscemu/registers.py @@ -109,7 +109,7 @@ class Registers: if mark_set: self.last_set = reg # check 32 bit signed bounds - self.vals[reg] = val + self.vals[reg] = val.unsigned() return True def get(self, reg, mark_read=True) -> 'Int32': diff --git a/riscemu/types/int32.py b/riscemu/types/int32.py index ab0f587..1d61a85 100644 --- a/riscemu/types/int32.py +++ b/riscemu/types/int32.py @@ -16,7 +16,8 @@ class Int32: def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0): if isinstance(val, (bytes, bytearray)): - self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True)) + signed = len(val) == 4 and self._type == c_int32 + self._val = self.__class__._type(int.from_bytes(val, 'little', signed=signed)) elif isinstance(val, self.__class__._type): self._val = val elif isinstance(val, (c_uint32, c_int32, Int32)): @@ -187,7 +188,7 @@ class Int32: :param bytes: The length of the bytearray :return: A little-endian representation of the contained integer """ - return bytearray(self.unsigned_value.to_bytes(bytes, 'little')) + return bytearray(self.unsigned_value.to_bytes(4, 'little'))[0:bytes] def signed(self) -> 'Int32': """ @@ -225,6 +226,25 @@ class Int32: def __hex__(self): return hex(self.value) + @classmethod + def sign_extend(cls, data: Union[bytes, bytearray, int], bits: int): + """ + Create an instance of Int32 by sign extending :param:bits bits from :param:data + to 32 bits + + :param data: The source data + :param bits: The number of bits in the source data + :return: An instance of Int32, holding the sign-extended value + """ + if isinstance(data, (bytes, bytearray)): + data = int.from_bytes(data, 'little') + sign = data >> (bits - 1) + if sign > 1: + print("overflow in Int32.sext!") + if sign: + data = (data & (2 ** (bits - 1) - 1)) - 2**(bits-1) + return cls(data) + class UInt32(Int32): """ From 663721b30628c62d7a83d4dcf1dae92abc27e8a4 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Mon, 28 Mar 2022 00:04:50 +0200 Subject: [PATCH 29/30] pre-release 2.0.0a4 --- .gitignore | 1 + riscemu/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5fe8770..8834265 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ __pycache__ .mypy_cache dist/ riscemu.egg-info +build/ diff --git a/riscemu/__init__.py b/riscemu/__init__.py index 0e6d850..6e67892 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -25,4 +25,4 @@ from .parser import tokenize, parse_tokens, AssemblyFileLoader __author__ = "Anton Lydike " __copyright__ = "Copyright 2022 Anton Lydike" -__version__ = '2.0.0a3' +__version__ = '2.0.0a4' From fe4b3efb6feb4357e71bb67e0114f891ec08c56d Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Thu, 31 Mar 2022 22:46:23 +0200 Subject: [PATCH 30/30] fixes #10 - fixed how preconfigured memory is handled --- riscemu/assembler.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/riscemu/assembler.py b/riscemu/assembler.py index 59165bf..44a7113 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -6,7 +6,8 @@ from .colors import FMT_PARSE, FMT_NONE from riscemu.types.exceptions import ParseException, ASSERT_LEN from .helpers import parse_numeric_argument, align_addr, get_section_base_name from .tokenizer import Token -from .types import Program, T_RelativeAddress, InstructionContext, Instruction, BinaryDataMemorySection, InstructionMemorySection +from .types import Program, T_RelativeAddress, InstructionContext, Instruction, BinaryDataMemorySection, \ + InstructionMemorySection, Int32 INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini') """ @@ -167,13 +168,13 @@ class AssemblerDirectives: cls.add_bytes(size, bytearray(size), context) @classmethod - def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext, unsigned=False): + def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext): ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data) if content is None: content = bytearray(size) if isinstance(content, int): - content = bytearray(content.to_bytes(size, 'little', signed=not unsigned)) + content = Int32(content).to_bytes(size) context.section.data += content