diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 2fcedd0..ce405ca 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -12,12 +12,11 @@ from typing import List, Type import riscemu from .config import RunConfig from .MMU import MMU -from .base import BinaryDataMemorySection from .colors import FMT_CPU, FMT_NONE from .debug import launch_debug_session from .exceptions import RiscemuBaseException, LaunchDebuggerException from .syscall import SyscallInterface, get_syscall_symbols -from .types import CPU, ProgramLoader, Int32 +from .types import CPU, ProgramLoader, Int32, BinaryDataMemorySection from .parser import AssemblyFileLoader if typing.TYPE_CHECKING: diff --git a/riscemu/assembler.py b/riscemu/assembler.py index 8e0fca0..121201a 100644 --- a/riscemu/assembler.py +++ b/riscemu/assembler.py @@ -2,12 +2,11 @@ from enum import Enum, auto from typing import List from typing import Optional, Tuple, Union -from .base import BinaryDataMemorySection, InstructionMemorySection from .colors import FMT_PARSE, FMT_NONE from .exceptions import ParseException, ASSERT_LEN from .helpers import parse_numeric_argument, align_addr, get_section_base_name from .tokenizer import Token -from .types import Program, T_RelativeAddress, InstructionContext, Instruction, UInt32, Int32 +from .types import Program, T_RelativeAddress, InstructionContext, Instruction, BinaryDataMemorySection, InstructionMemorySection INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini') """ diff --git a/riscemu/base.py b/riscemu/base.py deleted file mode 100644 index 474e4ed..0000000 --- a/riscemu/base.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -This file contains a base implementation of Instruction, and MemorySection. - -This aims to be a simple base, usable for everyone who needs the basic functionality, but doesn't -want to set up their own subtypes of Instruction and MemorySection -""" - -from typing import List, Tuple, Union -from .exceptions import MemoryAccessException -from .helpers import parse_numeric_argument -from .types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \ - T_AbsoluteAddress, Program - - -class SimpleInstruction(Instruction): - def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]], - context: InstructionContext, addr: T_RelativeAddress): - self.context = context - self.name = name - self.args = args - self.addr = addr - - def get_imm(self, num: int) -> int: - resolved_label = self.context.resolve_label(self.args[num], self.addr) - if resolved_label is None: - return parse_numeric_argument(self.args[num]) - return resolved_label - - def get_imm_reg(self, num: int) -> Tuple[int, str]: - return self.get_imm(num + 1), self.get_reg(num) - - def get_reg(self, num: int) -> str: - return self.args[num] - - -class InstructionMemorySection(MemorySection): - def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0): - self.name = name - self.base = base - self.context = context - self.size = len(instructions) * 4 - self.flags = MemoryFlags(True, True) - self.instructions = instructions - self.owner = owner - - def read(self, offset: T_RelativeAddress, size: int) -> bytearray: - raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read') - - def write(self, offset: T_RelativeAddress, size: int, data: bytearray): - raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write') - - def read_ins(self, offset: T_RelativeAddress) -> Instruction: - if offset % 4 != 0: - raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch') - return self.instructions[offset // 4] - - -class BinaryDataMemorySection(MemorySection): - def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None): - self.name = name - self.base = base - self.context = context - self.size = len(data) - self.flags = flags if flags is not None else MemoryFlags(False, False) - self.data = data - self.owner = owner - - def read(self, offset: T_RelativeAddress, size: int) -> bytearray: - if offset + size > self.size: - raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read') - return self.data[offset:offset + size] - - def write(self, offset: T_RelativeAddress, size: int, data: bytearray): - if offset + size > self.size: - raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write') - if len(data[0:size]) != size: - raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write') - self.data[offset:offset + size] = data[0:size] - - def read_ins(self, offset: T_RelativeAddress) -> Instruction: - raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self), - offset, 4, 'instruction fetch') diff --git a/riscemu/debug.py b/riscemu/debug.py index fd68f7b..5e4a6f4 100644 --- a/riscemu/debug.py +++ b/riscemu/debug.py @@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT """ import os.path -from .base import SimpleInstruction +from .types import SimpleInstruction from .helpers import * if typing.TYPE_CHECKING: diff --git a/riscemu/interactive.py b/riscemu/interactive.py index 71526f3..5b6b088 100644 --- a/riscemu/interactive.py +++ b/riscemu/interactive.py @@ -1,6 +1,5 @@ from riscemu import RunConfig -from riscemu.base import InstructionMemorySection, SimpleInstruction -from riscemu.types import InstructionContext, Program +from riscemu.types import InstructionMemorySection, SimpleInstruction, Program if __name__ == '__main__': from .CPU import UserModeCPU diff --git a/riscemu/parser.py b/riscemu/parser.py index f70cfa3..cd9ece0 100644 --- a/riscemu/parser.py +++ b/riscemu/parser.py @@ -3,17 +3,15 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -import os import re from typing import Dict, Tuple, Iterable, Callable, List -from .helpers import Peekable from .assembler import MemorySectionType, ParseContext, AssemblerDirectives -from .types import Program, T_ParserOpts, ProgramLoader from .colors import FMT_PARSE from .exceptions import ParseException +from .helpers import Peekable from .tokenizer import Token, TokenType, tokenize -from .base import SimpleInstruction +from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction def parse_instruction(token: Token, args: Tuple[str], context: ParseContext): diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index f7df7a9..e6ec7ef 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -191,6 +191,7 @@ class PrivCPU(CPU): trap = self.pending_traps.pop() # use the most recent trap if self.conf.verbosity > 0: print(FMT_CPU + "[CPU] taking trap {}!".format(trap) + FMT_NONE) + self.regs.dump_reg_a() if trap.priv != PrivModes.MACHINE: print(FMT_CPU + "[CPU] Trap not targeting machine mode encountered! - undefined behaviour!" + FMT_NONE) diff --git a/riscemu/priv/types.py b/riscemu/priv/types.py index f42d030..7882bdd 100644 --- a/riscemu/priv/types.py +++ b/riscemu/priv/types.py @@ -4,12 +4,11 @@ from dataclasses import dataclass from functools import lru_cache from typing import Tuple, Dict, Set -from riscemu import MemoryAccessException -from riscemu.priv.Exceptions import InstructionAccessFault, InstructionAddressMisalignedTrap, LoadAccessFault -from riscemu.types import Instruction, InstructionContext, T_RelativeAddress, MemoryFlags, T_AbsoluteAddress -from riscemu.base import BinaryDataMemorySection from riscemu.colors import FMT_NONE, FMT_PARSE from riscemu.decoder import format_ins, RISCV_REGS, decode +from riscemu.priv.Exceptions import InstructionAccessFault, InstructionAddressMisalignedTrap, LoadAccessFault +from riscemu.types import Instruction, InstructionContext, T_RelativeAddress, MemoryFlags, T_AbsoluteAddress, \ + BinaryDataMemorySection @dataclass(frozen=True) diff --git a/riscemu/types.py b/riscemu/types.py deleted file mode 100644 index 4a4692f..0000000 --- a/riscemu/types.py +++ /dev/null @@ -1,656 +0,0 @@ -""" -RiscEmu (c) 2021 Anton Lydike - -SPDX-License-Identifier: MIT - -This file contains abstract base classes and types, bundling only the absolute basic functionality - -See base.py for some basic implementations of these classes -""" -import os -import re -import typing -from abc import ABC, abstractmethod -from collections import defaultdict -from ctypes import c_uint32, c_int32 -from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple, Set, Union, Iterator, Callable, Type - -from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_RED, FMT_BOLD -from .config import RunConfig -from .exceptions import ParseException -from .helpers import format_bytes, get_section_base_name -from .registers import Registers - -if typing.TYPE_CHECKING: - from .MMU import MMU - from .instructions.instruction_set import InstructionSet - -# define some base type aliases so we can keep track of absolute and relative addresses -T_RelativeAddress = int -T_AbsoluteAddress = int - -# parser options are just dictionaries with arbitrary values -T_ParserOpts = Dict[str, any] - -NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$') - - -class Int32: - _type = c_int32 - __slots__ = ('_val',) - - def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0): - if isinstance(val, (bytes, bytearray)): - self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True)) - elif isinstance(val, self.__class__._type): - self._val = val - elif isinstance(val, (c_uint32, c_int32, Int32)): - self._val = self.__class__._type(val.value) - elif isinstance(val, int): - self._val = self.__class__._type(val) - else: - raise RuntimeError( - "Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val) - ) - - def __add__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - - return self.__class__(self._val.value + other) - - def __sub__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value - other) - - def __mul__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value * other) - - def __truediv__(self, other): - return self // other - - def __floordiv__(self, other): - if isinstance(other, Int32): - other = other.value - return self.__class__(self.value // other) - - def __mod__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value % other) - - def __and__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value & other) - - def __or__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value | other) - - def __xor__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self._val.value ^ other) - - def __lshift__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self.value << other) - - def __rshift__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.__class__(self.value >> other) - - def __eq__(self, other: Union['Int32', int]): - if isinstance(other, Int32): - other = other.value - return self.value == other - - def __neg__(self): - return self.__class__(-self._val.value) - - def __abs__(self): - return self.__class__(abs(self.value)) - - def __bytes__(self): - return self.to_bytes(4) - - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, self.value) - - def __str__(self): - return str(self.value) - - def __format__(self, format_spec): - return self.value.__format__(format_spec) - - def __hash__(self): - return hash(self.value) - - def __gt__(self, other): - if isinstance(other, Int32): - other = other.value - return self.value > other - - def __lt__(self, other): - if isinstance(other, Int32): - other = other.value - return self.value < other - - def __le__(self, other): - if isinstance(other, Int32): - other = other.value - return self.value <= other - - def __ge__(self, other): - if isinstance(other, Int32): - other = other.value - return self.value >= other - - def __bool__(self): - return bool(self.value) - - def __cmp__(self, other): - if isinstance(other, Int32): - other = other.value - return self.value.__cmp__(other) - - # right handed binary operators - - def __radd__(self, other): - return self + other - - def __rsub__(self, other): - return self.__class__(other) - self - - def __rmul__(self, other): - return self * other - - def __rtruediv__(self, other): - return self.__class__(other) // self - - def __rfloordiv__(self, other): - return self.__class__(other) // self - - def __rmod__(self, other): - return self.__class__(other) % self - - def __rand__(self, other): - return self.__class__(other) & self - - def __ror__(self, other): - return self.__class__(other) | self - - def __rxor__(self, other): - return self.__class__(other) ^ self - - @property - def value(self): - return self._val.value - - def unsigned(self) -> 'UInt32': - return UInt32(self) - - def to_bytes(self, bytes: int = 4) -> bytearray: - return bytearray(self.unsigned_value.to_bytes(bytes, 'little')) - - def signed(self) -> 'Int32': - if self.__class__ == Int32: - return self - return Int32(self) - - @property - def unsigned_value(self): - return c_uint32(self.value).value - - def shift_right_logical(self, ammount: Union['Int32', int]): - if isinstance(ammount, Int32): - ammount = ammount.value - return self.__class__((self.value % 0x100000000) >> ammount) - - def __int__(self): - return self.value - - def __hex__(self): - return hex(self.value) - - -class UInt32(Int32): - _type = c_uint32 - - def unsigned(self) -> 'UInt32': - return self - - @property - def unsigned_value(self): - return self._val.value - - def shift_right_logical(self, ammount: Union['Int32', int]): - return self >> ammount - - -@dataclass(frozen=True) -class MemoryFlags: - read_only: bool - executable: bool - - def __repr__(self): - return "r{}{}".format( - '-' if self.read_only else 'w', - 'x' if self.executable else '-' - ) - - -class InstructionContext: - base_address: T_AbsoluteAddress - """ - The address where the instruction block is placed - """ - - labels: Dict[str, T_RelativeAddress] - """ - This dictionary maps all labels to their relative position of the instruction block - """ - - numbered_labels: Dict[str, List[T_RelativeAddress]] - """ - This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where - the label was placed - """ - - global_symbol_dict: Dict[str, T_AbsoluteAddress] - """ - A reference to the MMU's global symbol dictionary for access to global symbols - """ - - def __init__(self): - self.labels = dict() - self.numbered_labels = defaultdict(list) - self.base_address = 0 - self.global_symbol_dict = dict() - - def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]: - if NUMBER_SYMBOL_PATTERN.match(symbol): - if address_at is None: - raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol)) - - direction = symbol[-1] - values = self.numbered_labels.get(symbol[:-1], []) - if direction == 'b': - return max((addr + self.base_address for addr in values if addr < address_at), default=None) - else: - return min((addr + self.base_address for addr in values if addr > address_at), default=None) - else: - # if it's not a local symbol, try the globals - if symbol not in self.labels: - return self.global_symbol_dict.get(symbol, None) - # otherwise return the local symbol - return self.labels.get(symbol, None) - - -class Instruction(ABC): - name: str - args: tuple - - @abstractmethod - def get_imm(self, num: int) -> int: - """ - parse and get immediate argument - """ - pass - - @abstractmethod - def get_imm_reg(self, num: int) -> Tuple[int, str]: - """ - parse and get an argument imm(reg) - """ - pass - - @abstractmethod - def get_reg(self, num: int) -> str: - """ - parse and get an register argument - """ - pass - - def __repr__(self): - return "{} {}".format(self.name, ", ".join(self.args)) - - -@dataclass -class MemorySection(ABC): - name: str - flags: MemoryFlags - size: int - base: T_AbsoluteAddress - owner: str - context: InstructionContext - - @property - def end(self): - return self.base + self.size - - @abstractmethod - def read(self, offset: T_RelativeAddress, size: int) -> bytearray: - pass - - @abstractmethod - def write(self, offset: T_RelativeAddress, size: int, data: bytearray): - pass - - @abstractmethod - def read_ins(self, offset: T_RelativeAddress) -> Instruction: - pass - - def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex', - bytes_per_row: int = 16, rows: int = 10, group: int = 4): - if self.flags.executable: - bytes_per_row = 4 - highlight = None - if end is None: - end = min(start + (bytes_per_row * (rows // 2)), self.size - 1) - highlight = start - start = max(0, start - (bytes_per_row * (rows // 2))) - - if self.flags.executable: - print(FMT_MEM + "{}, viewing {} instructions:".format( - self, (end - start) // 4 - ) + FMT_NONE) - - for addr in range(start, end, 4): - if addr == highlight: - print(FMT_UNDERLINE + FMT_ORANGE, end='') - print("0x{:04x}: {}{}".format( - self.base + addr, self.read_ins(addr), FMT_NONE - )) - else: - print(FMT_MEM + "{}, viewing {} bytes:".format( - self, (end - start) - ) + FMT_NONE) - - aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end - - for addr in range(start, aligned_end, bytes_per_row): - hi_ind = (highlight - addr) // group if highlight is not None else -1 - print("0x{:04x}: {}{}".format( - self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE - )) - - if aligned_end != end: - hi_ind = (highlight - aligned_end) // group if highlight is not None else -1 - print("0x{:04x}: {}{}".format( - self.base + aligned_end, format_bytes( - self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind - ), FMT_NONE - )) - - def dump_all(self, *args, **kwargs): - self.dump(0, self.size, *args, **kwargs) - - def __repr__(self): - return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format( - self.__class__.__name__, - self.name, - self.base, - self.size, - self.flags, - self.owner - ) - - -class Program: - """ - This represents a collection of sections which together form an executable program - - When you want to create a program which can be located anywhere in memory, set base to None, - this signals the other components, that this is relocatable. Set the base of each section to - the offset in the program, and everything will be taken care of for you. - - """ - name: str - context: InstructionContext - global_labels: Set[str] - relative_labels: Set[str] - sections: List[MemorySection] - base: Optional[T_AbsoluteAddress] - is_loaded: bool - - @property - def size(self): - if len(self.sections) == 0: - return 0 - if self.base is None: - return self.sections[-1].base + self.sections[-1].size - return (self.sections[-1].base - self.base) + self.sections[-1].size - - def __init__(self, name: str, base: Optional[int] = None): - self.name = name - self.context = InstructionContext() - self.sections = [] - self.global_labels = set() - self.relative_labels = set() - self.base = base - self.is_loaded = False - - def add_section(self, sec: MemorySection): - # print a warning when a section is located before the programs base - if self.base is not None: - if sec.base < self.base: - print( - FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format( - sec, self.name, self.base - ) + FMT_NONE) - - self.sections.append(sec) - # keep section list ordered - self.sections.sort(key=lambda section: section.base) - - def __repr__(self): - return "{}(name={},sections={},base={})".format( - self.__class__.__name__, self.name, self.global_labels, - [s.name for s in self.sections], self.base - ) - - @property - def entrypoint(self): - if '_start' in self.context.labels: - return self.context.labels.get('_start') - if 'main' in self.context.labels: - return self.context.labels.get('main') - for sec in self.sections: - if get_section_base_name(sec.name) == '.text' and sec.flags.executable: - return sec.base - - def loaded_trigger(self, at_addr: T_AbsoluteAddress): - """ - This trigger is called when the binary is loaded and its final address in memory is determined - - This will do a small sanity check to prevent programs loading twice, or at addresses they don't - expect to be loaded. - - Then it will finalize all relative symbols defined in it to point to the correct addresses. - - :param at_addr: the address where the program will be located - """ - if self.is_loaded: - if at_addr != self.base: - raise RuntimeError("Program loaded twice at different addresses! This will probably break things!") - return - - if self.base is not None and self.base != at_addr: - print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, ' - 'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE) - - # check if we are relocating - if self.base != at_addr: - offset = at_addr if self.base is None else at_addr - self.base - - # move all sections by the offset - for sec in self.sections: - sec.base += offset - - # move all relative symbols by the offset - for name in self.relative_labels: - self.context.labels[name] += offset - - self.base = at_addr - self.context.base_address = at_addr - - -class ProgramLoader(ABC): - """ - A program loader is always specific to a given source file. It is a place to store all state - concerning the parsing and loading of that specific source file, including options. - """ - - def __init__(self, source_path: str, options: T_ParserOpts): - self.source_path = source_path - self.options = options - self.filename = os.path.split(self.source_path)[-1] - - @classmethod - @abstractmethod - def can_parse(cls, source_path: str) -> float: - """ - Return confidence that the file located at source_path - should be parsed and loaded by this loader - :param source_path: the path of the source file - :return: the confidence that this file belongs to this parser - """ - pass - - @classmethod - @abstractmethod - def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]: - """ - parse command line args into an options dictionary - - :param argv: the command line args list - :return: all remaining command line args and the parser options object - """ - pass - - @classmethod - def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader': - """ - Instantiate a loader for the given source file with the required arguments - - :param source_path: the path to the source file - :param options: the parsed options (guaranteed to come from this classes get_options method. - :return: An instance of a ProgramLoader for the spcified source - """ - return cls(source_path, options) - - @abstractmethod - def parse(self) -> Union[Program, Iterator[Program]]: - """ - - :return: - """ - pass - - -class CPU(ABC): - # static cpu configuration - INS_XLEN: int = 4 - - # housekeeping variables - regs: Registers - mmu: 'MMU' - pc: T_AbsoluteAddress - cycle: int - halted: bool - - # debugging context - debugger_active: bool - - # instruction information - instructions: Dict[str, Callable[[Instruction], None]] - instruction_sets: Set['InstructionSet'] - - # configuration - conf: RunConfig - - def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig): - self.mmu = mmu - self.regs = Registers() - self.conf = conf - - self.instruction_sets = set() - self.instructions = dict() - - for set_class in instruction_sets: - ins_set = set_class(self) - self.instructions.update(ins_set.load()) - self.instruction_sets.add(ins_set) - - self.halted = False - self.cycle = 0 - self.pc = 0 - self.debugger_active = False - - def run_instruction(self, ins: Instruction): - """ - Execute a single instruction - - :param ins: The instruction to execute - """ - if ins.name in self.instructions: - self.instructions[ins.name](ins) - else: - # this should never be reached, as unknown instructions are imparseable - raise RuntimeError("Unknown instruction: {}".format(ins)) - - def load_program(self, program: Program): - self.mmu.load_program(program) - - def __repr__(self): - """ - Returns a representation of the CPU and some of its state. - """ - return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format( - self.__class__.__name__, - self.pc, - self.cycle, - self.halted, - " ".join(s.name for s in self.instruction_sets) - ) - - @abstractmethod - def step(self, verbose=False): - pass - - @abstractmethod - def run(self, verbose=False): - pass - - def launch(self, program: Program, verbose: bool = False): - if program not in self.mmu.programs: - print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE) - return - - self.pc = program.entrypoint - self.run(verbose) - - @classmethod - @abstractmethod - def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]: - pass - - def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]: - return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name)) - - @property - def sections(self): - return self.mmu.sections - - @property - def programs(self): - return self.mmu.programs diff --git a/riscemu/types/__init__.py b/riscemu/types/__init__.py new file mode 100644 index 0000000..7b46fc1 --- /dev/null +++ b/riscemu/types/__init__.py @@ -0,0 +1,26 @@ +from typing import Dict +import re + +# define some base type aliases so we can keep track of absolute and relative addresses +T_RelativeAddress = int +T_AbsoluteAddress = int + +# parser options are just dictionaries with arbitrary values +T_ParserOpts = Dict[str, any] + +NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$') + +from .flags import MemoryFlags +from .int32 import UInt32, Int32 +from .instruction import Instruction +from .instruction_context import InstructionContext +from .memory_section import MemorySection +from .program import Program +from .program_loader import ProgramLoader +from .cpu import CPU +from .simple_instruction import SimpleInstruction +from .instruction_memory_section import InstructionMemorySection +from .binary_data_memory_section import BinaryDataMemorySection + + + diff --git a/riscemu/types/binary_data_memory_section.py b/riscemu/types/binary_data_memory_section.py new file mode 100644 index 0000000..7f85ff1 --- /dev/null +++ b/riscemu/types/binary_data_memory_section.py @@ -0,0 +1,29 @@ +from . import MemorySection, InstructionContext, MemoryFlags, T_RelativeAddress, Instruction +from ..exceptions import MemoryAccessException + + +class BinaryDataMemorySection(MemorySection): + def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None): + self.name = name + self.base = base + self.context = context + self.size = len(data) + self.flags = flags if flags is not None else MemoryFlags(False, False) + self.data = data + self.owner = owner + + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + if offset + size > self.size: + raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read') + return self.data[offset:offset + size] + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + if offset + size > self.size: + raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write') + if len(data[0:size]) != size: + raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write') + self.data[offset:offset + size] = data[0:size] + + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self), + offset, 4, 'instruction fetch') diff --git a/riscemu/types/cpu.py b/riscemu/types/cpu.py new file mode 100644 index 0000000..23de29c --- /dev/null +++ b/riscemu/types/cpu.py @@ -0,0 +1,107 @@ +import typing +from abc import ABC, abstractmethod +from typing import List, Type, Callable, Set, Dict + +from ..registers import Registers +from ..config import RunConfig +from ..colors import FMT_RED, FMT_NONE +from . import T_AbsoluteAddress, Instruction, Program, ProgramLoader + + +class CPU(ABC): + # static cpu configuration + INS_XLEN: int = 4 + + # housekeeping variables + regs: Registers + mmu: 'MMU' + pc: T_AbsoluteAddress + cycle: int + halted: bool + + # debugging context + debugger_active: bool + + # instruction information + instructions: Dict[str, Callable[[Instruction], None]] + instruction_sets: Set['InstructionSet'] + + # configuration + conf: RunConfig + + def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig): + self.mmu = mmu + self.regs = Registers() + self.conf = conf + + self.instruction_sets = set() + self.instructions = dict() + + for set_class in instruction_sets: + ins_set = set_class(self) + self.instructions.update(ins_set.load()) + self.instruction_sets.add(ins_set) + + self.halted = False + self.cycle = 0 + self.pc = 0 + self.debugger_active = False + + def run_instruction(self, ins: Instruction): + """ + Execute a single instruction + + :param ins: The instruction to execute + """ + if ins.name in self.instructions: + self.instructions[ins.name](ins) + else: + # this should never be reached, as unknown instructions are imparseable + raise RuntimeError("Unknown instruction: {}".format(ins)) + + def load_program(self, program: Program): + self.mmu.load_program(program) + + def __repr__(self): + """ + Returns a representation of the CPU and some of its state. + """ + return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format( + self.__class__.__name__, + self.pc, + self.cycle, + self.halted, + " ".join(s.name for s in self.instruction_sets) + ) + + @abstractmethod + def step(self, verbose=False): + pass + + @abstractmethod + def run(self, verbose=False): + pass + + def launch(self, program: Program, verbose: bool = False): + if program not in self.mmu.programs: + print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE) + return + + self.pc = program.entrypoint + self.run(verbose) + + @classmethod + @abstractmethod + def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]: + pass + + def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]: + return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name)) + + @property + def sections(self): + return self.mmu.sections + + @property + def programs(self): + return self.mmu.programs diff --git a/riscemu/types/flags.py b/riscemu/types/flags.py new file mode 100644 index 0000000..7c1a7e7 --- /dev/null +++ b/riscemu/types/flags.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class MemoryFlags: + read_only: bool + executable: bool + + def __repr__(self): + return "r{}{}".format( + '-' if self.read_only else 'w', + 'x' if self.executable else '-' + ) diff --git a/riscemu/types/instruction.py b/riscemu/types/instruction.py new file mode 100644 index 0000000..516d254 --- /dev/null +++ b/riscemu/types/instruction.py @@ -0,0 +1,31 @@ +from abc import ABC, abstractmethod +from typing import Tuple + + +class Instruction(ABC): + name: str + args: tuple + + @abstractmethod + def get_imm(self, num: int) -> int: + """ + parse and get immediate argument + """ + pass + + @abstractmethod + def get_imm_reg(self, num: int) -> Tuple[int, str]: + """ + parse and get an argument imm(reg) + """ + pass + + @abstractmethod + def get_reg(self, num: int) -> str: + """ + parse and get an register argument + """ + pass + + def __repr__(self): + return "{} {}".format(self.name, ", ".join(self.args)) diff --git a/riscemu/types/instruction_context.py b/riscemu/types/instruction_context.py new file mode 100644 index 0000000..6d8678c --- /dev/null +++ b/riscemu/types/instruction_context.py @@ -0,0 +1,53 @@ +from collections import defaultdict +from typing import Dict, List, Optional + +from ..exceptions import ParseException +from ..types import T_AbsoluteAddress, T_RelativeAddress, NUMBER_SYMBOL_PATTERN + + +class InstructionContext: + base_address: T_AbsoluteAddress + """ + The address where the instruction block is placed + """ + + labels: Dict[str, T_RelativeAddress] + """ + This dictionary maps all labels to their relative position of the instruction block + """ + + numbered_labels: Dict[str, List[T_RelativeAddress]] + """ + This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where + the label was placed + """ + + global_symbol_dict: Dict[str, T_AbsoluteAddress] + """ + A reference to the MMU's global symbol dictionary for access to global symbols + """ + + def __init__(self): + self.labels = dict() + self.numbered_labels = defaultdict(list) + self.base_address = 0 + self.global_symbol_dict = dict() + + def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]: + if NUMBER_SYMBOL_PATTERN.match(symbol): + if address_at is None: + raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol)) + + direction = symbol[-1] + values = self.numbered_labels.get(symbol[:-1], []) + if direction == 'b': + return max((addr + self.base_address for addr in values if addr < address_at), default=None) + else: + return min((addr + self.base_address for addr in values if addr > address_at), default=None) + else: + # if it's not a local symbol, try the globals + if symbol not in self.labels: + return self.global_symbol_dict.get(symbol, None) + # otherwise return the local symbol + return self.labels.get(symbol, None) + diff --git a/riscemu/types/instruction_memory_section.py b/riscemu/types/instruction_memory_section.py new file mode 100644 index 0000000..76553b0 --- /dev/null +++ b/riscemu/types/instruction_memory_section.py @@ -0,0 +1,27 @@ +from typing import List + +from . import MemorySection, Instruction, InstructionContext, MemoryFlags, T_RelativeAddress +from .. import MemoryAccessException + + +class InstructionMemorySection(MemorySection): + def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0): + self.name = name + self.base = base + self.context = context + self.size = len(instructions) * 4 + self.flags = MemoryFlags(True, True) + self.instructions = instructions + self.owner = owner + + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read') + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write') + + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + if offset % 4 != 0: + raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch') + return self.instructions[offset // 4] + diff --git a/riscemu/types/int32.py b/riscemu/types/int32.py new file mode 100644 index 0000000..2fa08b7 --- /dev/null +++ b/riscemu/types/int32.py @@ -0,0 +1,202 @@ +from typing import Union +from ctypes import c_int32, c_uint32 + + +class Int32: + _type = c_int32 + __slots__ = ('_val',) + + def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0): + if isinstance(val, (bytes, bytearray)): + self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True)) + elif isinstance(val, self.__class__._type): + self._val = val + elif isinstance(val, (c_uint32, c_int32, Int32)): + self._val = self.__class__._type(val.value) + elif isinstance(val, int): + self._val = self.__class__._type(val) + else: + raise RuntimeError( + "Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val) + ) + + def __add__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + + return self.__class__(self._val.value + other) + + def __sub__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value - other) + + def __mul__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value * other) + + def __truediv__(self, other): + return self // other + + def __floordiv__(self, other): + if isinstance(other, Int32): + other = other.value + return self.__class__(self.value // other) + + def __mod__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value % other) + + def __and__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value & other) + + def __or__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value | other) + + def __xor__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self._val.value ^ other) + + def __lshift__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self.value << other) + + def __rshift__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.__class__(self.value >> other) + + def __eq__(self, other: Union['Int32', int]): + if isinstance(other, Int32): + other = other.value + return self.value == other + + def __neg__(self): + return self.__class__(-self._val.value) + + def __abs__(self): + return self.__class__(abs(self.value)) + + def __bytes__(self): + return self.to_bytes(4) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, self.value) + + def __str__(self): + return str(self.value) + + def __format__(self, format_spec): + return self.value.__format__(format_spec) + + def __hash__(self): + return hash(self.value) + + def __gt__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value > other + + def __lt__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value < other + + def __le__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value <= other + + def __ge__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value >= other + + def __bool__(self): + return bool(self.value) + + def __cmp__(self, other): + if isinstance(other, Int32): + other = other.value + return self.value.__cmp__(other) + + # right handed binary operators + + def __radd__(self, other): + return self + other + + def __rsub__(self, other): + return self.__class__(other) - self + + def __rmul__(self, other): + return self * other + + def __rtruediv__(self, other): + return self.__class__(other) // self + + def __rfloordiv__(self, other): + return self.__class__(other) // self + + def __rmod__(self, other): + return self.__class__(other) % self + + def __rand__(self, other): + return self.__class__(other) & self + + def __ror__(self, other): + return self.__class__(other) | self + + def __rxor__(self, other): + return self.__class__(other) ^ self + + @property + def value(self): + return self._val.value + + def unsigned(self) -> 'UInt32': + return UInt32(self) + + def to_bytes(self, bytes: int = 4) -> bytearray: + return bytearray(self.unsigned_value.to_bytes(bytes, 'little')) + + def signed(self) -> 'Int32': + if self.__class__ == Int32: + return self + return Int32(self) + + @property + def unsigned_value(self): + return c_uint32(self.value).value + + def shift_right_logical(self, ammount: Union['Int32', int]): + if isinstance(ammount, Int32): + ammount = ammount.value + return self.__class__((self.value % 0x100000000) >> ammount) + + def __int__(self): + return self.value + + def __hex__(self): + return hex(self.value) + + +class UInt32(Int32): + _type = c_uint32 + + def unsigned(self) -> 'UInt32': + return self + + @property + def unsigned_value(self): + return self._val.value + + def shift_right_logical(self, ammount: Union['Int32', int]): + return self >> ammount diff --git a/riscemu/types/memory_section.py b/riscemu/types/memory_section.py new file mode 100644 index 0000000..a1512e4 --- /dev/null +++ b/riscemu/types/memory_section.py @@ -0,0 +1,88 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Optional + +from ..colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE +from ..helpers import format_bytes +from . import MemoryFlags, T_AbsoluteAddress, InstructionContext, T_RelativeAddress, Instruction + + +@dataclass +class MemorySection(ABC): + name: str + flags: MemoryFlags + size: int + base: T_AbsoluteAddress + owner: str + context: InstructionContext + + @property + def end(self): + return self.base + self.size + + @abstractmethod + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + pass + + @abstractmethod + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + pass + + @abstractmethod + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + pass + + def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex', + bytes_per_row: int = 16, rows: int = 10, group: int = 4): + if self.flags.executable: + bytes_per_row = 4 + highlight = None + if end is None: + end = min(start + (bytes_per_row * (rows // 2)), self.size - 1) + highlight = start + start = max(0, start - (bytes_per_row * (rows // 2))) + + if self.flags.executable: + print(FMT_MEM + "{}, viewing {} instructions:".format( + self, (end - start) // 4 + ) + FMT_NONE) + + for addr in range(start, end, 4): + if addr == highlight: + print(FMT_UNDERLINE + FMT_ORANGE, end='') + print("0x{:04x}: {}{}".format( + self.base + addr, self.read_ins(addr), FMT_NONE + )) + else: + print(FMT_MEM + "{}, viewing {} bytes:".format( + self, (end - start) + ) + FMT_NONE) + + aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end + + for addr in range(start, aligned_end, bytes_per_row): + hi_ind = (highlight - addr) // group if highlight is not None else -1 + print("0x{:04x}: {}{}".format( + self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE + )) + + if aligned_end != end: + hi_ind = (highlight - aligned_end) // group if highlight is not None else -1 + print("0x{:04x}: {}{}".format( + self.base + aligned_end, format_bytes( + self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind + ), FMT_NONE + )) + + def dump_all(self, *args, **kwargs): + self.dump(0, self.size, *args, **kwargs) + + def __repr__(self): + return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format( + self.__class__.__name__, + self.name, + self.base, + self.size, + self.flags, + self.owner + ) diff --git a/riscemu/types/program.py b/riscemu/types/program.py new file mode 100644 index 0000000..24533f7 --- /dev/null +++ b/riscemu/types/program.py @@ -0,0 +1,104 @@ +from typing import List, Optional, Set + +from ..colors import FMT_RED, FMT_BOLD, FMT_NONE, FMT_MEM +from ..helpers import get_section_base_name +from . import InstructionContext, T_AbsoluteAddress, MemorySection + + +class Program: + """ + This represents a collection of sections which together form an executable program + + When you want to create a program which can be located anywhere in memory, set base to None, + this signals the other components, that this is relocatable. Set the base of each section to + the offset in the program, and everything will be taken care of for you. + + """ + name: str + context: InstructionContext + global_labels: Set[str] + relative_labels: Set[str] + sections: List[MemorySection] + base: Optional[T_AbsoluteAddress] + is_loaded: bool + + @property + def size(self): + if len(self.sections) == 0: + return 0 + if self.base is None: + return self.sections[-1].base + self.sections[-1].size + return (self.sections[-1].base - self.base) + self.sections[-1].size + + def __init__(self, name: str, base: Optional[int] = None): + self.name = name + self.context = InstructionContext() + self.sections = [] + self.global_labels = set() + self.relative_labels = set() + self.base = base + self.is_loaded = False + + def add_section(self, sec: MemorySection): + # print a warning when a section is located before the programs base + if self.base is not None: + if sec.base < self.base: + print( + FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format( + sec, self.name, self.base + ) + FMT_NONE) + + self.sections.append(sec) + # keep section list ordered + self.sections.sort(key=lambda section: section.base) + + def __repr__(self): + return "{}(name={},sections={},base={})".format( + self.__class__.__name__, self.name, self.global_labels, + [s.name for s in self.sections], self.base + ) + + @property + def entrypoint(self): + if '_start' in self.context.labels: + return self.context.labels.get('_start') + if 'main' in self.context.labels: + return self.context.labels.get('main') + for sec in self.sections: + if get_section_base_name(sec.name) == '.text' and sec.flags.executable: + return sec.base + + def loaded_trigger(self, at_addr: T_AbsoluteAddress): + """ + This trigger is called when the binary is loaded and its final address in memory is determined + + This will do a small sanity check to prevent programs loading twice, or at addresses they don't + expect to be loaded. + + Then it will finalize all relative symbols defined in it to point to the correct addresses. + + :param at_addr: the address where the program will be located + """ + if self.is_loaded: + if at_addr != self.base: + raise RuntimeError("Program loaded twice at different addresses! This will probably break things!") + return + + if self.base is not None and self.base != at_addr: + print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, ' + 'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE) + + # check if we are relocating + if self.base != at_addr: + offset = at_addr if self.base is None else at_addr - self.base + + # move all sections by the offset + for sec in self.sections: + sec.base += offset + + # move all relative symbols by the offset + for name in self.relative_labels: + self.context.labels[name] += offset + + self.base = at_addr + self.context.base_address = at_addr diff --git a/riscemu/types/program_loader.py b/riscemu/types/program_loader.py new file mode 100644 index 0000000..e951749 --- /dev/null +++ b/riscemu/types/program_loader.py @@ -0,0 +1,58 @@ +import os +from abc import abstractmethod, ABC +from typing import Union, Iterator, List + +from . import T_ParserOpts, Program + + +class ProgramLoader(ABC): + """ + A program loader is always specific to a given source file. It is a place to store all state + concerning the parsing and loading of that specific source file, including options. + """ + + def __init__(self, source_path: str, options: T_ParserOpts): + self.source_path = source_path + self.options = options + self.filename = os.path.split(self.source_path)[-1] + + @classmethod + @abstractmethod + def can_parse(cls, source_path: str) -> float: + """ + Return confidence that the file located at source_path + should be parsed and loaded by this loader + :param source_path: the path of the source file + :return: the confidence that this file belongs to this parser + """ + pass + + @classmethod + @abstractmethod + def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]: + """ + parse command line args into an options dictionary + + :param argv: the command line args list + :return: all remaining command line args and the parser options object + """ + pass + + @classmethod + def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader': + """ + Instantiate a loader for the given source file with the required arguments + + :param source_path: the path to the source file + :param options: the parsed options (guaranteed to come from this classes get_options method. + :return: An instance of a ProgramLoader for the spcified source + """ + return cls(source_path, options) + + @abstractmethod + def parse(self) -> Union[Program, Iterator[Program]]: + """ + + :return: + """ + pass diff --git a/riscemu/types/simple_instruction.py b/riscemu/types/simple_instruction.py new file mode 100644 index 0000000..59d7b6c --- /dev/null +++ b/riscemu/types/simple_instruction.py @@ -0,0 +1,26 @@ +from typing import Union, Tuple + +from . import Instruction, T_RelativeAddress, InstructionContext +from ..helpers import parse_numeric_argument + + +class SimpleInstruction(Instruction): + def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]], + context: InstructionContext, addr: T_RelativeAddress): + self.context = context + self.name = name + self.args = args + self.addr = addr + + def get_imm(self, num: int) -> int: + resolved_label = self.context.resolve_label(self.args[num], self.addr) + if resolved_label is None: + return parse_numeric_argument(self.args[num]) + return resolved_label + + def get_imm_reg(self, num: int) -> Tuple[int, str]: + return self.get_imm(num + 1), self.get_reg(num) + + def get_reg(self, num: int) -> str: + return self.args[num] + diff --git a/setup.py b/setup.py index edaae98..c702df6 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setuptools.setup( "Operating System :: OS Independent", ], package_dir={"": "."}, - packages=["riscemu", "riscemu.decoder", "riscemu.instructions", "riscemu.IO", "riscemu.priv"], + packages=["riscemu", "riscemu.decoder", "riscemu.instructions", "riscemu.IO", "riscemu.priv", "riscemu.types"], python_requires=">=3.6", install_requires=[ "pyelftools~=0.27"