diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 3df7d29..46a10ce 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -9,12 +9,12 @@ on them. import sys from typing import Tuple, List, Dict, Callable, Type -from .Executable import MemoryFlags -from .Syscall import SyscallInterface, get_syscall_symbols -from .Exceptions import RiscemuBaseException, LaunchDebuggerException +from .base_types import MemoryFlags +from .syscall import SyscallInterface, get_syscall_symbols +from .exceptions import RiscemuBaseException, LaunchDebuggerException from .MMU import MMU -from .Config import RunConfig -from .Registers import Registers +from .config import RunConfig +from .registers import Registers from .debug import launch_debug_session from .colors import FMT_CPU, FMT_NONE, FMT_ERROR @@ -23,7 +23,7 @@ import riscemu import typing if typing.TYPE_CHECKING: - from . import Executable, LoadedExecutable, LoadedInstruction + from . import base_types, LoadedExecutable, LoadedInstruction from .instructions.InstructionSet import InstructionSet @@ -78,7 +78,7 @@ class CPU: """ return RiscVTokenizer(tokenizer_input, self.all_instructions()) - def load(self, e: riscemu.Executable): + def load(self, e: riscemu.base_types): """ Load an executable into Memory """ diff --git a/riscemu/Executable.py b/riscemu/Executable.py deleted file mode 100644 index ed48eb5..0000000 --- a/riscemu/Executable.py +++ /dev/null @@ -1,319 +0,0 @@ -""" -RiscEmu (c) 2021 Anton Lydike - -SPDX-License-Identifier: MIT - -This file holds Executable and LoadedExecutable classes as well as loading and some linking code. - -FIXME: refactor this code into muliple files -""" - -from dataclasses import dataclass, field -from typing import Dict, List, Tuple, Union, Optional -from .Exceptions import * -from .helpers import * -from math import log - -import typing - -if typing.TYPE_CHECKING: - from .Tokenizer import RiscVInstructionToken - - -@dataclass(frozen=True) -class MemoryFlags: - read_only: bool - executable: bool - - def __repr__(self): - return "{}({},{})".format( - self.__class__.__name__, - 'ro' if self.read_only else 'rw', - 'x' if self.executable else '-' - ) - - -@dataclass -class MemorySection: - name: str - flags: MemoryFlags - size: int = 0 - content: List[bytearray] = field(default_factory=list) - - def add(self, data: bytearray): - self.content.append(data) - self.size += len(data) - - def continuous_content(self, parent: 'LoadedExecutable'): - """ - converts the content into one continuous bytearray - """ - if self.size == 0: - return bytearray(0) - content = self.content[0] - for b in self.content[1:]: - content += b - return content - - -@dataclass -class InstructionMemorySection(MemorySection): - content: List['RiscVInstructionToken'] = field(default_factory=list) - - def add_insn(self, insn: 'RiscVInstructionToken'): - self.content.append(insn) - self.size += 1 - - def continuous_content(self, parent: 'LoadedExecutable'): - return [ - LoadedInstruction(ins.instruction, ins.args, parent) - for ins in self.content - ] - - -@dataclass() -class Executable: - run_ptr: Tuple[str, int] - sections: Dict[str, MemorySection] - symbols: Dict[str, Tuple[str, int]] - exported_symbols: List[str] - name: str - - def __repr__(self): - return "{}(sections = {}, symbols = {}, run_ptr = {}, globals={})".format( - self.__class__.__name__, - " ".join(self.sections.keys()), - " ".join(self.symbols.keys()), - self.run_ptr, - ",".join(self.exported_symbols) - ) - - -### LOADING CODE - - -@dataclass(frozen=True) -class LoadedInstruction: - """ - An instruction which is loaded into memory. It knows the binary it belongs to to resolve symbols - """ - name: str - args: List[str] - bin: 'LoadedExecutable' - - def get_imm(self, num: int): - """ - parse and get immediate argument - """ - if len(self.args) <= num: - raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args)) - arg = self.args[num] - # look up symbols - if self.bin.has_symb(arg): - return self.bin.lookup_symbol(arg) - return parse_numeric_argument(arg) - - def get_imm_reg(self, num: int): - """ - parse and get an argument imm(reg) - """ - if len(self.args) <= num: - raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args)) - arg = self.args[num] - ASSERT_IN("(", arg) - imm, reg = arg[:-1].split("(") - if self.bin.has_symb(imm): - return self.bin.lookup_symbol(imm), reg - return parse_numeric_argument(imm), reg - - def get_reg(self, num: int): - """ - parse and get an register argument - """ - if len(self.args) <= num: - raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args)) - return self.args[num] - - def __repr__(self): - return "{} {}".format(self.name, ", ".join(self.args)) - - -@dataclass(frozen=True) -class LoadedMemorySection: - """ - A section which is loaded into memory - """ - name: str - base: int - size: int - content: Union[List[LoadedInstruction], bytearray] = field(repr=False) - flags: MemoryFlags - owner: str - - def read(self, offset: int, size: int): - if offset < 0: - raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, size, 'read') - if offset + size > self.size: - raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset, - size, 'read') - return self.content[offset: offset + size] - - def read_instruction(self, offset): - if not self.flags.executable: - raise MemoryAccessException('Section not executable!', self.base + offset, 1, 'read exec') - - if offset < 0: - raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'read exec') - if offset >= self.size: - raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset, - 1, 'read exec') - return self.content[offset] - - def write(self, offset, size, data): - if self.flags.read_only: - raise MemoryAccessException('Section not writeable {}'.format(self.name), self.base + offset, size, 'write') - - if offset < 0: - raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'write') - if offset >= self.size: - raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset, - size, 'write') - - for i in range(size): - self.content[offset + i] = data[i] - - def dump(self, at_addr=None, fmt='hex', max_rows=10, group=4, bytes_per_row=16, all=False): - highlight = -1 - if at_addr is None: - at_addr = self.base - else: - highlight = at_addr - self.base - - at_off = at_addr - self.base - start = max(align_addr(at_off - ((max_rows * bytes_per_row) // 2), 8) - 8, 0) - if all: - end = self.size - start = 0 - else: - end = min(start + (max_rows * bytes_per_row), self.size) - - fmt_str = " 0x{:0" + str(ceil(log(self.base + end, 16))) + "X}: {}" - - if self.flags.executable: - # this section holds instructions! - start = 0 if all else max(at_off - (max_rows // 2), 0) - end = self.size if all else min(self.size, start + max_rows) - print(FMT_MEM + "{}, viewing {} instructions:".format( - self, end - start - ) + FMT_NONE) - for i in range(start, end): - if i == highlight: - ins = FMT_UNDERLINE + FMT_ORANGE + repr(self.content[i]) + FMT_NONE - else: - ins = repr(self.content[i]) - print(fmt_str.format(self.base + i, ins)) - else: - print(FMT_MEM + "{}, viewing {} bytes:".format( - self, end - start - ) + FMT_NONE) - for i in range(0, end - start, bytes_per_row): - data = self.content[start + i: min(start + i + bytes_per_row, end)] - if start + i <= highlight <= start + i + bytes_per_row: - # do hightlight here! - hi_ind = (highlight - start - i) // group - print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group, highlight=hi_ind))) - else: - print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group))) - if end == self.size: - print(FMT_MEM + "End of section!" + FMT_NONE) - else: - print(FMT_MEM + "More bytes ..." + FMT_NONE) - - def __repr__(self): - return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format( - self.__class__.__name__, - self.name, - self.base, - self.size, - self.flags, - self.owner - ) - - -class LoadedExecutable: - """ - This represents an executable which is loaded into memory at address base_addr - - This is basicalle the "loader" in normal system environments - It initializes the stack and heap - - It still holds a symbol table, that is not accessible memory since I don't want to deal with - binary strings in memory etc. - """ - name: str - base_addr: int - sections_by_name: Dict[str, LoadedMemorySection] - sections: List[LoadedMemorySection] - symbols: Dict[str, int] - run_ptr: int - exported_symbols: Dict[str, int] - global_symbol_table: Dict[str, int] - - def __init__(self, exe: Executable, base_addr: int, global_symbol_table: Dict[str, int]): - self.name = exe.name - self.base_addr = base_addr - self.sections = list() - self.sections_by_name = dict() - self.symbols = dict() - self.exported_symbols = dict() - self.global_symbol_table = global_symbol_table - - curr = base_addr - for sec in exe.sections.values(): - loaded_sec = LoadedMemorySection( - sec.name, - curr, - sec.size, - sec.continuous_content(self), - sec.flags, - self.name - ) - self.sections.append(loaded_sec) - self.sections_by_name[loaded_sec.name] = loaded_sec - curr = align_addr(loaded_sec.size + curr) - - for name, (sec_name, offset) in exe.symbols.items(): - if sec_name == '_static_': - self.symbols[name] = offset - else: - ASSERT_IN(sec_name, self.sections_by_name) - self.symbols[name] = self.sections_by_name[sec_name].base + offset - - for name in exe.exported_symbols: - self.exported_symbols[name] = self.symbols[name] - - self.size = curr - base_addr - - # translate run_ptr from executable - run_ptr_sec, run_ptr_off = exe.run_ptr - self.run_ptr = self.sections_by_name[run_ptr_sec].base + run_ptr_off - - def lookup_symbol(self, name): - if name in self.symbols: - return self.symbols[name] - if name in self.global_symbol_table: - return self.global_symbol_table[name] - raise LinkerException('Symbol {} not found!'.format(name), (self,)) - - def __repr__(self): - return '{}[{}](base=0x{:08X}, size={}bytes, sections={}, run_ptr=0x{:08X})'.format( - self.__class__.__name__, - self.name, - self.base_addr, - self.size, - " ".join(self.sections_by_name.keys()), - self.run_ptr - ) - - def has_symb(self, arg): - return arg in self.symbols or arg in self.global_symbol_table diff --git a/riscemu/ExecutableParser.py b/riscemu/ExecutableParser.py deleted file mode 100644 index 3e18c3d..0000000 --- a/riscemu/ExecutableParser.py +++ /dev/null @@ -1,193 +0,0 @@ -""" -RiscEmu (c) 2021 Anton Lydike - -SPDX-License-Identifier: MIT - -This file holds the parser that parses the tokenizer output. -""" - -from .helpers import parse_numeric_argument, int_to_bytes -from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags -from .Exceptions import * - -from .Tokenizer import tokenize, TokenType, Token, COMMA, NEWLINE - -from typing import Dict, Tuple, List, Optional - - -class ExecutableParser: - """ - Parses output form the RiscVTokenizer - """ - tokenizer: 'RiscVTokenizer' - - def __init__(self, tokenizer: 'RiscVTokenizer'): - self.instructions: List['RiscVInstructionToken'] = list() - self.symbols: Dict[str, Tuple[str, int]] = dict() - self.sections: Dict[str, MemorySection] = dict() - self.tokenizer = tokenizer - self.active_section: Optional[str] = None - self.implicit_sections = False - self.globals: List[str] = list() - - def parse(self) -> Executable: - """ - parse tokenizer output into an executable - :return: the parsed executable - :raise ParseException: Raises a ParseException when invalid input is read - """ - for token in self.tokenizer.tokens: - if isinstance(token, 'RiscVInstructionToken'): - self.parse_instruction(token) - elif isinstance(token, 'RiscVSymbolToken'): - self.handle_symbol(token) - elif isinstance(token, 'RiscVPseudoOpToken'): - self.handle_pseudo_op(token) - return self._get_execuable() - - def _get_execuable(self) -> Executable: - start_ptr = ('text', 0) - if '_start' in self.symbols: - start_ptr = self.symbols['_start'] - elif 'main' in self.symbols: - start_ptr = self.symbols['main'] - return Executable(start_ptr, self.sections, self.symbols, self.globals, self.tokenizer.name) - - def parse_instruction(self, ins: 'RiscVInstructionToken') -> None: - """ - parses an Instruction token - :param ins: the instruction token - """ - if self.active_section is None: - self.op_text() - self.implicit_sections = True - - ASSERT_EQ(self.active_section, 'text') - sec = self._curr_sec() - if isinstance(sec, InstructionMemorySection): - sec.add_insn(ins) - else: - raise ParseException("SHOULD NOT BE REACHED") - - def handle_symbol(self, token: 'RiscVSymbolToken'): - """ - Handle a symbol token (such as 'main:') - :param token: the symbol token - """ - ASSERT_NOT_IN(token.name, self.symbols) - ASSERT_NOT_NULL(self.active_section) - sec_pos = self._curr_sec().size - self.symbols[token.name] = (self.active_section, sec_pos) - - def handle_pseudo_op(self, op: 'RiscVPseudoOpToken'): - """ - Handle a pseudo op token (such as '.word 0xffaabbcc') - :param op: the peseudo-op token - """ - name = 'op_' + op.name - if hasattr(self, name): - getattr(self, name)(op) - else: - raise ParseException("Unknown pseudo op: {}".format(op), (op,)) - - ## Pseudo op implementations: - def op_section(self, op: 'RiscVPseudoOpToken'): - """ - handles a .section token - :param op: The token - """ - ASSERT_LEN(op.args, 1) - name = op.args[0][1:] - ASSERT_IN(name, ('data', 'rodata', 'text')) - getattr(self, 'op_' + name)(op) - - def op_text(self, op: 'RiscVPseudoOpToken' = None): - """ - handles a .text token - :param op: The token - """ - self._set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection) - - def op_data(self, op: 'RiscVPseudoOpToken' = None): - """ - handles a .data token - :param op: The token - """ - self._set_sec('data', MemoryFlags(read_only=False, executable=False)) - - def op_rodata(self, op: 'RiscVPseudoOpToken' = None): - """ - handles a .rodata token - :param op: The token - """ - self._set_sec('rodata', MemoryFlags(read_only=True, executable=False)) - - def op_space(self, op: 'RiscVPseudoOpToken'): - """ - handles a .space token. Inserts empty space into the current (data or rodata) section - :param op: The token - """ - ASSERT_IN(self.active_section, ('data', 'rodata')) - ASSERT_LEN(op.args, 1) - size = parse_numeric_argument(op.args[0]) - self._curr_sec().add(bytearray(size)) - - def op_ascii(self, op: 'RiscVPseudoOpToken'): - """ - handles a .ascii token. Inserts ascii encoded text into the currrent data section - :param op: The token - """ - ASSERT_IN(self.active_section, ('data', 'rodata')) - ASSERT_LEN(op.args, 1) - str = op.args[0][1:-1].encode('ascii').decode('unicode_escape') - self._curr_sec().add(bytearray(str, 'ascii')) - - def op_asciiz(self, op: 'RiscVPseudoOpToken'): - """ - handles a .ascii token. Inserts nullterminated ascii encoded text into the currrent data section - :param op: The token - """ - ASSERT_IN(self.active_section, ('data', 'rodata')) - ASSERT_LEN(op.args, 1) - str = op.args[0][1:-1].encode('ascii').decode('unicode_escape') - self._curr_sec().add(bytearray(str + '\0', 'ascii')) - - def op_global(self, op: 'RiscVPseudoOpToken'): - """ - handles a .global token. Marks the token as global - :param op: The token - """ - ASSERT_LEN(op.args, 1) - name = op.args[0] - self.globals.append(name) - - def op_set(self, op: 'RiscVPseudoOpToken'): - """ - handles a .set name, val token. Sets the symbol name to val - :param op: The token - """ - ASSERT_LEN(op.args, 2) - name = op.args[0] - val = parse_numeric_argument(op.args[1]) - self.symbols[name] = ('_static_', val) - - def op_align(self, op: 'RiscVPseudoOpToken'): - """ - handles an align token. Currently a nop (just not implemented fully yet, as linker handles most alignement tasks) - :param op: The token - """ - pass - - def op_word(self, op: 'RiscVPseudoOpToken'): - ASSERT_LEN(op.args, 1) - val = parse_numeric_argument(op.args[0]) - self._curr_sec().add(int_to_bytes(val, 4)) - - ## Section handler code - def _set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection): - if name not in self.sections: - self.sections[name] = cls(name, flags) - self.active_section = name - - def _curr_sec(self): - return self.sections[self.active_section] diff --git a/riscemu/MMU.py b/riscemu/MMU.py index f7ca534..c255eb4 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -4,10 +4,10 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -from .Config import RunConfig -from .Executable import Executable, LoadedExecutable, LoadedMemorySection, LoadedInstruction, MemoryFlags +from .base_types import InstructionContext, Instruction, MemorySection, MemoryFlags, T_RelativeAddress, T_AbsoluteAddress, \ + Program from .helpers import align_addr, int_from_bytes -from .Exceptions import OutOfMemoryException, InvalidAllocationException +from .exceptions import OutOfMemoryException, InvalidAllocationException from .colors import * from typing import Dict, List, Tuple, Optional @@ -27,19 +27,14 @@ class MMU: No single allocation can be bigger than 64 MB """ - sections: List[LoadedMemorySection] + sections: List[MemorySection] """ A list of all loaded memory sections """ - binaries: List[LoadedExecutable] + programs: List[Program] """ - A list of all loaded executables - """ - - last_bin: Optional[LoadedExecutable] = None - """ - The last loaded executable (the next executable is inserted directly after this one) + A list of all loaded programs """ global_symbols: Dict[str, int] @@ -47,79 +42,14 @@ class MMU: The global symbol table """ - last_ins_sec: Optional[LoadedMemorySection] - - def __init__(self, conf: RunConfig): - """ - Create a new MMU, respecting the active RunConfiguration - - :param conf: The config to respect - """ - self.sections: List[LoadedMemorySection] = list() - self.binaries: List[LoadedExecutable] = list() - self.first_free_addr: int = 0x100 - self.conf: RunConfig = conf - self.global_symbols: Dict[str, int] = dict() - self.last_ins_sec = None - - def load_bin(self, exe: Executable) -> LoadedExecutable: - """ - Load an executable into memory - - :param exe: the executable to load - :return: A LoadedExecutable - :raises OutOfMemoryException: When all memory is used - """ - - # align to 8 byte word - addr = align_addr(self.first_free_addr) - - loaded_bin = LoadedExecutable(exe, addr, self.global_symbols) - - if loaded_bin.size + addr > self.max_size: - raise OutOfMemoryException('load of executable') - - self.binaries.append(loaded_bin) - self.first_free_addr = loaded_bin.base_addr + loaded_bin.size - - # read sections into sec dict - for sec in loaded_bin.sections: - self.sections.append(sec) - - self.global_symbols.update(loaded_bin.exported_symbols) - - print(FMT_MEM + "[MMU] Successfully loaded{}: {}".format(FMT_NONE, loaded_bin)) - - return loaded_bin - - def allocate_section(self, name: str, req_size: int, flag: MemoryFlags): + def __init__(self): """ - Used to allocate a memory region (data only). Use `load_bin` if you want to load a binary, this is used for - stack and maybe malloc in the future. - - :param name: Name of the section to allocate - :param req_size: The requested size - :param flag: The flags protecting this memory section - :return: The LoadedMemorySection + Create a new MMU """ - if flag.executable: - raise InvalidAllocationException('cannot allocate executable section', name, req_size, flag) - - if req_size < 0: - raise InvalidAllocationException('Invalid size request', name, req_size, flag) + self.sections = list() + self.global_symbols = dict() - if req_size > self.max_alloc_size: - raise InvalidAllocationException('Cannot allocate more than {} bytes at a time'.format(self.max_alloc_size), - name, req_size, flag) - - base = align_addr(self.first_free_addr) - size = align_addr(req_size) - sec = LoadedMemorySection(name, base, size, bytearray(size), flag, "") - self.sections.append(sec) - self.first_free_addr = base + size - return sec - - def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]: + def get_sec_containing(self, addr: T_AbsoluteAddress) -> Optional[MemorySection]: """ Returns the section that contains the address addr @@ -131,29 +61,25 @@ class MMU: return sec return None - def get_bin_containing(self, addr: int) -> Optional[LoadedExecutable]: + def get_bin_containing(self, addr: T_AbsoluteAddress) -> Optional[Program]: for exe in self.binaries: if exe.base_addr <= addr < exe.base_addr + exe.size: return exe return None - def read_ins(self, addr: int) -> LoadedInstruction: + def read_ins(self, addr: T_AbsoluteAddress) -> Instruction: """ Read a single instruction located at addr :param addr: The location :return: The Instruction """ - sec = self.last_ins_sec - if sec is not None and sec.base <= addr < sec.base + sec.size: - return sec.read_instruction(addr - sec.base) sec = self.get_sec_containing(addr) - self.last_ins_sec = sec if sec is None: print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! " "Have you forgotten an exit syscall or ret statement?" + FMT_NONE) raise RuntimeError("No next instruction available!") - return sec.read_instruction(addr - sec.base) + return sec.read_ins(addr - sec.base) def read(self, addr: int, size: int) -> bytearray: """ @@ -164,6 +90,9 @@ class MMU: :return: The bytearray at addr """ sec = self.get_sec_containing(addr) + if sec is None: + print(FMT_MEM + "[MMU] Trying to read data form invalid region at 0x{:x}! ".format(addr) + FMT_NONE) + raise RuntimeError("Reading from uninitialized memory region!") return sec.read(addr - sec.base, size) def write(self, addr: int, size: int, data): @@ -176,7 +105,7 @@ class MMU: """ sec = self.get_sec_containing(addr) if sec is None: - print(FMT_MEM + '[MMU] Invalid write into non-initialized section at 0x{:08X}'.format(addr) + FMT_NONE) + print(FMT_MEM + '[MMU] Invalid write into non-initialized region at 0x{:08X}'.format(addr) + FMT_NONE) raise RuntimeError("No write pls") return sec.write(addr - sec.base, size, data) @@ -195,7 +124,7 @@ class MMU: return sec.dump(addr, *args, **kwargs) - def symbol(self, symb: str): + def label(self, symb: str): """ Look up the symbol symb in all local symbol tables (and the global one) @@ -204,9 +133,9 @@ class MMU: print(FMT_MEM + "[MMU] Lookup for symbol {}:".format(symb) + FMT_NONE) if symb in self.global_symbols: print(" Found global symbol {}: 0x{:X}".format(symb, self.global_symbols[symb])) - for b in self.binaries: - if symb in b.symbols: - print(" Found local symbol {}: 0x{:X} in {}".format(symb, b.symbols[symb], b.name)) + for section in self.sections: + if symb in section.context.labels: + print(" Found local labels {}: 0x{:X} in {}".format(symb, section.context.labels[symb], section.name)) def read_int(self, addr: int) -> int: return int_from_bytes(self.read(addr, 4)) diff --git a/riscemu/__init__.py b/riscemu/__init__.py index fc080cf..d580e2b 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -8,21 +8,19 @@ This package aims at providing an all-round usable RISC-V emulator and debugger It contains everything needed to run assembly files, so you don't need any custom compilers or toolchains """ -from .Exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ +from .exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException -from .Executable import Executable, LoadedExecutable, LoadedMemorySection - -from .ExecutableParser import ExecutableParser +from .base_types import Executable, LoadedExecutable, LoadedMemorySection from .instructions import * from .MMU import MMU -from .Registers import Registers -from .Syscall import SyscallInterface, Syscall +from .registers import Registers +from .syscall import SyscallInterface, Syscall from .CPU import CPU -from .Config import RunConfig +from .config import RunConfig __author__ = "Anton Lydike " __copyright__ = "Copyright 2021 Anton Lydike" diff --git a/riscemu/assembler.py b/riscemu/assembler.py new file mode 100644 index 0000000..c8c7546 --- /dev/null +++ b/riscemu/assembler.py @@ -0,0 +1,169 @@ +from typing import Optional, Tuple, Union +from enum import Enum, auto +from typing import Optional, Tuple, Union + +from helpers import parse_numeric_argument +from .base_types import Program, T_RelativeAddress, InstructionContext +from .colors import FMT_PARSE, FMT_NONE +from .exceptions import ParseException +from .helpers import ASSERT_LEN +from .tokenizer import Token +from .types import BinaryDataMemorySection, InstructionMemorySection + + +INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini') + + +class MemorySectionType(Enum): + Data = auto() + Instructions = auto() + + +class CurrentSection: + name: str + data: Union[list, bytearray] + type: MemorySectionType + + def current_address(self) -> T_RelativeAddress: + if self.type == MemorySectionType.Data: + return len(self.data) + return len(self.data) * 4 + + def __repr__(self): + return "{}(name={},data={},type={})".format( + self.__class__.__name__, self.name, + self.data, self.type.name + ) + + +class ParseContext: + section: Optional[CurrentSection] + context: InstructionContext + program: Program + + def __init__(self, name: str): + self.program = Program(name) + self.context = self.program.context + self.section = None + + def finalize(self) -> Program: + self.finalize_section() + return self.program + + def finalize_section(self): + if self.section is None: + return + if self.section.type == MemorySectionType.Data: + section = BinaryDataMemorySection(self.section.data, self.section.name, self.context) + self.program.add_section(section) + elif self.section.type == MemorySectionType.Instructions: + section = InstructionMemorySection(self.section.data, self.section.name, self.context) + self.program.add_section(section) + + def __repr__(self): + return "{}(\n\tsetion={},\n\tprogram={}\n)".format( + self.__class__.__name__, self.section, self.program + ) + + +def ASSERT_IN_SECTION_TYPE(context: ParseContext, type: MemorySectionType): + if context.section is None: + raise ParseException('Error, expected to be in {} section, but no section is present...'.format(type.name)) + if context.section.type != type: + raise ParseException( + 'Error, expected to be in {} section, but currently in {}...'.format(type.name, context.section) + ) + + +def get_section_base_name(section_name: str) -> str: + return '.' + section_name.split('.')[1] + + +class AssemblerDirectives: + """ + This class represents a collection of all assembler directives as documented by + https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md#pseudo-ops + + All class methods prefixed with op_ are directly used as assembler directives. + """ + + @classmethod + def op_align(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 1) + ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data) + align_to = parse_numeric_argument(args[0]) + current_mod = context.section.current_address() % align_to + if current_mod == 0: + return + context.section.data += bytearray(align_to - current_mod) + + @classmethod + def op_section(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 1) + context.finalize_section() + + if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES: + context.section.type = MemorySectionType.Instructions + context.section.data = list() + else: + context.section.type = MemorySectionType.Data + context.section.data = bytearray() + context.section.name = args[0] + + @classmethod + def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 1) + context.program.global_labels.add(args[0]) + + @classmethod + def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 2) + name = args[0] + value = parse_numeric_argument(args[1]) + context.context.labels[name] = value + + @classmethod + def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext): + ASSERT_LEN(args, 1) + ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data) + size = parse_numeric_argument(args[0]) + cls.add_bytes(size, bytearray(size), context) + + @classmethod + def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext): + ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data) + + if content is None: + content = bytearray(size) + + @classmethod + def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True): + encoded_bytes = bytearray(text.encode('ascii')) + if zero_terminate: + encoded_bytes += bytearray(1) + cls.add_bytes(len(encoded_bytes), encoded_bytes, context) + + @classmethod + def handle_instruction(cls, token: Token, args: Tuple[str], context: ParseContext): + op = token.value[1:] + if hasattr(cls, 'op_' + op): + getattr(cls, 'op_' + op)(token, args, context) + elif op in ('text', 'data', 'rodata', 'bss', 'sbss'): + cls.op_section(token, (token.value,), context) + elif op in ('string', 'asciiz', 'asciz', 'ascii'): + ASSERT_LEN(args, 1) + cls.add_text(args[0], context, op == 'ascii') + elif op in DATA_OP_SIZES: + size = DATA_OP_SIZES[op] + for arg in args: + cls.add_bytes(size, parse_numeric_argument(arg), context) + else: + print(FMT_PARSE + "Unknown assembler directive: {} {} in {}".format(token, args, context) + FMT_NONE) + + +DATA_OP_SIZES = { + 'byte': 1, + '2byte': 2, 'half': 2, 'short': 2, + '4byte': 4, 'word': 4, 'long': 4, + '8byte': 8, 'dword': 8, 'quad': 8, +} diff --git a/riscemu/base_types.py b/riscemu/base_types.py new file mode 100644 index 0000000..0bf92b7 --- /dev/null +++ b/riscemu/base_types.py @@ -0,0 +1,186 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: MIT + +This file contains base classes which represent loaded programs +""" + +import re +from abc import ABC +from dataclasses import dataclass +from typing import Dict, List, Optional, Tuple, Set +from collections import defaultdict + +from .helpers import * + +T_RelativeAddress = int +T_AbsoluteAddress = int + +NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$') + + +@dataclass(frozen=True) +class MemoryFlags: + read_only: bool + executable: bool + + def __repr__(self): + return "{}({},{})".format( + self.__class__.__name__, + 'ro' if self.read_only else 'rw', + 'x' if self.executable else '-' + ) + + +class InstructionContext: + base_address: T_AbsoluteAddress + """ + The address where the instruction block is placed + """ + + labels: Dict[str, T_RelativeAddress] + """ + This dictionary maps all labels to their relative position of the instruction block + """ + numbered_labels: Dict[str, List[T_RelativeAddress]] + """ + This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where + the label was placed + """ + + def __init__(self): + self.labels = dict() + self.numbered_labels = defaultdict(list) + self.base_address = 0 + + def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_RelativeAddress]: + if NUMBER_SYMBOL_PATTERN.match(symbol): + if address_at is None: + raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol)) + + direction = symbol[-1] + if direction == 'b': + return max([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr < address_at], + default=None) + else: + return min([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr > address_at], + default=None) + else: + return self.labels.get(symbol, None) + + +class Instruction(ABC): + name: str + args: tuple + + @abstractmethod + def get_imm(self, num: int) -> int: + """ + parse and get immediate argument + """ + pass + + @abstractmethod + def get_imm_reg(self, num: int) -> Tuple[int, str]: + """ + parse and get an argument imm(reg) + """ + pass + + @abstractmethod + def get_reg(self, num: int) -> str: + """ + parse and get an register argument + """ + pass + + def __repr__(self): + return "{} {}".format(self.name, ", ".join(self.args)) + + +@dataclass +class MemorySection(ABC): + name: str + flags: MemoryFlags + size: int + base: T_AbsoluteAddress + owner: str + context: InstructionContext + + @abstractmethod + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + pass + + @abstractmethod + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + pass + + @abstractmethod + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + pass + + def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress], fmt: str = 'hex', + bytes_per_row: int = 16, rows: int = 10, group: int = 4): + if self.flags.executable: + bytes_per_row = 4 + highlight = None + if end is None: + end = start + (bytes_per_row * (rows // 2)) + highlight = start + start = start - (bytes_per_row * (rows // 2)) + if self.flags.executable: + print(FMT_MEM + "{}, viewing {} instructions:".format( + self, (end - start) // 4 + ) + FMT_NONE) + + for addr in range(start, end, 4): + if addr == highlight: + print(FMT_UNDERLINE + FMT_ORANGE, end='') + print("0x{:x}: {}{}".format( + self.base + addr, self.read_ins(addr), FMT_NONE + )) + else: + print(FMT_MEM + "{}, viewing {} bytes:".format( + self, (end - start) + ) + FMT_NONE) + + for addr in range(start, end, bytes_per_row): + hi_ind = (highlight - addr) // group + print("0x{:x}: {}{}".format( + self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE + )) + + def __repr__(self): + return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format( + self.__class__.__name__, + self.name, + self.base, + self.size, + self.flags, + self.owner + ) + + +class Program: + name: str + context: InstructionContext + global_labels: Set[str] + sections: List[MemorySection] + base: T_AbsoluteAddress = 0 + + def __init__(self, name: str, base: int = 0): + self.name = name + self.context = InstructionContext() + self.sections = [] + self.base = base + self.global_labels = set() + + def add_section(self, sec: MemorySection): + self.sections.append(sec) + + def __repr__(self): + return "{}(name={},context={},globals={},sections={},base={})".format( + self.__class__.__name__, self.name, self.context, self.global_labels, + [s.name for s in self.sections], self.base + ) diff --git a/riscemu/Config.py b/riscemu/config.py similarity index 85% rename from riscemu/Config.py rename to riscemu/config.py index 40ee2c5..7182958 100644 --- a/riscemu/Config.py +++ b/riscemu/config.py @@ -10,7 +10,7 @@ from typing import Optional @dataclass(frozen=True, init=True) class RunConfig: - stack_size: int = 8 * 1024 * 64 # for 8KB stack + stack_size: int = 8 * 1024 * 64 # for 8KB stack include_scall_symbols: bool = True add_accept_imm: bool = False # debugging @@ -21,3 +21,5 @@ class RunConfig: scall_fs: bool = False verbosity: int = 0 + +CONFIG = RunConfig() diff --git a/riscemu/debug.py b/riscemu/debug.py index 930dcbb..8350526 100644 --- a/riscemu/debug.py +++ b/riscemu/debug.py @@ -5,9 +5,9 @@ SPDX-License-Identifier: MIT """ import typing -from .Registers import Registers +from .registers import Registers from .colors import FMT_DEBUG, FMT_NONE -from .Executable import LoadedInstruction +from .base_types import Instruction from .helpers import * if typing.TYPE_CHECKING: @@ -50,7 +50,7 @@ def launch_debug_session(cpu: 'CPU', mmu: 'MMU', reg: 'Registers', prompt=""): return bin = mmu.get_bin_containing(cpu.pc) - ins = LoadedInstruction(name, list(args), bin) + ins = Instruction(name, list(args), bin) print(FMT_DEBUG + "Running instruction " + ins + FMT_NONE) cpu.run_instruction(ins) diff --git a/riscemu/Exceptions.py b/riscemu/exceptions.py similarity index 97% rename from riscemu/Exceptions.py rename to riscemu/exceptions.py index cfe4be8..b75b358 100644 --- a/riscemu/Exceptions.py +++ b/riscemu/exceptions.py @@ -7,11 +7,9 @@ SPDX-License-Identifier: MIT import typing from abc import abstractmethod +from .base_types import Instruction from .colors import * -if typing.TYPE_CHECKING: - from .Executable import LoadedInstruction - class RiscemuBaseException(BaseException): @abstractmethod @@ -116,7 +114,7 @@ class InvalidAllocationException(RiscemuBaseException): class UnimplementedInstruction(RiscemuBaseException): - def __init__(self, ins: 'LoadedInstruction'): + def __init__(self, ins: Instruction): self.ins = ins def message(self): diff --git a/riscemu/helpers.py b/riscemu/helpers.py index 8becedd..ad8ac06 100644 --- a/riscemu/helpers.py +++ b/riscemu/helpers.py @@ -5,7 +5,8 @@ SPDX-License-Identifier: MIT """ from math import log10, ceil -from .Exceptions import * +from .exceptions import * +from typing import Iterable, Iterator, TypeVar, Generic, List def align_addr(addr: int, to_bytes: int = 8) -> int: @@ -105,3 +106,36 @@ def bind_twos_complement(val): elif val > 2147483647: return val - 4294967296 return val + + +T = TypeVar('T') + + +class Peekable(Generic[T], Iterator[T]): + def __init__(self, iterable: Iterable[T]): + self.iterable = iter(iterable) + self.cache: List[T] = list() + + def __iter__(self) -> Iterator[T]: + return self + + def __next__(self) -> T: + if self.cache: + return self.cache.pop() + return next(self.iterable) + + def peek(self) -> T: + try: + if self.cache: + return self.cache[0] + pop = next(self.iterable) + self.cache.append(pop) + return pop + except StopIteration: + return None + + def push_back(self, item: T): + self.cache = [item] + self.cache + + def is_empty(self) -> bool: + return self.peek() is None diff --git a/riscemu/instructions/InstructionSet.py b/riscemu/instructions/InstructionSet.py index 6b55e7d..6666dc9 100644 --- a/riscemu/instructions/InstructionSet.py +++ b/riscemu/instructions/InstructionSet.py @@ -9,7 +9,7 @@ from typing import Tuple, Callable, Dict from abc import ABC from ..CPU import CPU from ..helpers import ASSERT_LEN, ASSERT_IN, to_unsigned -from ..Executable import LoadedInstruction +from ..base_types import LoadedInstruction class InstructionSet(ABC): diff --git a/riscemu/instructions/RV32A.py b/riscemu/instructions/RV32A.py index 9432c83..3de2383 100644 --- a/riscemu/instructions/RV32A.py +++ b/riscemu/instructions/RV32A.py @@ -1,5 +1,5 @@ from .InstructionSet import InstructionSet, LoadedInstruction -from ..Exceptions import INS_NOT_IMPLEMENTED +from ..exceptions import INS_NOT_IMPLEMENTED from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed diff --git a/riscemu/instructions/RV32I.py b/riscemu/instructions/RV32I.py index dcefb07..cb19af6 100644 --- a/riscemu/instructions/RV32I.py +++ b/riscemu/instructions/RV32I.py @@ -9,9 +9,9 @@ from .InstructionSet import * from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed from ..colors import FMT_DEBUG, FMT_NONE from ..debug import launch_debug_session -from ..Exceptions import LaunchDebuggerException -from ..Syscall import Syscall -from ..Executable import LoadedInstruction +from ..exceptions import LaunchDebuggerException +from ..syscall import Syscall +from ..base_types import LoadedInstruction class RV32I(InstructionSet): diff --git a/riscemu/instructions/RV32M.py b/riscemu/instructions/RV32M.py index bd0490f..5b1412f 100644 --- a/riscemu/instructions/RV32M.py +++ b/riscemu/instructions/RV32M.py @@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT """ from .InstructionSet import * -from ..Exceptions import INS_NOT_IMPLEMENTED +from ..exceptions import INS_NOT_IMPLEMENTED class RV32M(InstructionSet): diff --git a/riscemu/parser.py b/riscemu/parser.py new file mode 100644 index 0000000..64cefe8 --- /dev/null +++ b/riscemu/parser.py @@ -0,0 +1,79 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: MIT +""" +import re +from typing import Dict, Tuple, Iterable, Callable + +from helpers import Peekable +from .assembler import MemorySectionType, ParseContext, AssemblerDirectives +from .base_types import Program +from .colors import FMT_PARSE +from .exceptions import ParseException +from .tokenizer import Token, TokenType +from .types import SimpleInstruction + + +def parse_instruction(token: Token, args: Tuple[str], context: ParseContext): + if context.section is None or context.section.type != MemorySectionType.Instructions: + raise ParseException("{} {} encountered in invalid context: {}".format(token, args, context)) + ins = SimpleInstruction(token.value, args, context.context, context.section.current_address()) + context.section.data.append(ins) + + +def parse_label(token: Token, args: Tuple[str], context: ParseContext): + name = token.value[:-1] + if re.match(r'^\d+$', name): + # relative label: + context.context.numbered_labels[name].append(context.section.current_address()) + else: + if name in context.context.labels: + print(FMT_PARSE + 'Warn: Symbol {} defined twice!'.format(name)) + context.context.labels[name] = context.section.current_address() + + +PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = { + TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction, + TokenType.LABEL: parse_label, + TokenType.INSTRUCTION_NAME: parse_instruction +} + + +def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program: + context = ParseContext(name) + + for token, args in composite_tokenizer(Peekable[Token](tokens_iter)): + if token.type not in PARSERS: + raise ParseException("Unexpected token type: {}, {}".format(token, args)) + PARSERS[token.type](token, args, context) + + return context.finalize() + + +def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]: + tokens: Peekable[Token] = Peekable[Token](tokens_iter) + + while not tokens.is_empty(): + token = next(tokens) + if token.type in (TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME): + yield token, tuple(take_arguments(tokens)) + + +def take_arguments(tokens: Peekable[Token]) -> Iterable[str]: + """ + Consumes (argument comma)* and yields argument.value until newline is reached + If an argument is not followed by either a newline or a comma, a parse exception is raised + The newline at the end is consumed + :param tokens: A Peekable iterator over some Tokens + """ + while True: + if tokens.peek().type == TokenType.ARGUMENT: + yield next(tokens).value + if tokens.peek().type == TokenType.COMMA: + next(tokens) + elif tokens.peek().type == TokenType.NEWLINE: + next(tokens) + break + raise ParseException("Expected newline, instead got {}".format(tokens.peek())) + diff --git a/riscemu/priv/ElfLoader.py b/riscemu/priv/ElfLoader.py index 886da8a..3a4bc33 100644 --- a/riscemu/priv/ElfLoader.py +++ b/riscemu/priv/ElfLoader.py @@ -2,8 +2,8 @@ from dataclasses import dataclass from typing import List, Dict, Tuple from .Exceptions import * -from ..Exceptions import RiscemuBaseException -from ..Executable import MemoryFlags, LoadedMemorySection +from ..exceptions import RiscemuBaseException +from ..base_types import MemoryFlags, LoadedMemorySection from ..decoder import decode, RISCV_REGS, format_ins from ..helpers import FMT_PARSE, FMT_NONE, FMT_GREEN, FMT_BOLD diff --git a/riscemu/priv/ImageLoader.py b/riscemu/priv/ImageLoader.py index 36bbdce..1e89eee 100644 --- a/riscemu/priv/ImageLoader.py +++ b/riscemu/priv/ImageLoader.py @@ -8,8 +8,8 @@ from typing import Dict, List, Optional, TYPE_CHECKING from .ElfLoader import ElfInstruction, ElfLoadedMemorySection, InstructionAccessFault, InstructionAddressMisalignedTrap from .PrivMMU import PrivMMU -from ..Config import RunConfig -from ..Executable import LoadedMemorySection, MemoryFlags +from ..config import RunConfig +from ..base_types import LoadedMemorySection, MemoryFlags from ..IO.IOModule import IOModule from ..colors import FMT_ERROR, FMT_NONE, FMT_MEM from ..decoder import decode @@ -117,7 +117,7 @@ class MemoryImageMMU(PrivMMU): return "{}{:+x} ({}:{})".format(sym, addr - val, sec.owner, sec.name) return "{}:{}{:+x}".format(sec.owner, sec.name, addr - sec.base) - def symbol(self, symb: str): + def label(self, symb: str): print(FMT_MEM + "Looking up symbol {}".format(symb)) for owner, symbs in self.debug_info['symbols'].items(): if symb in symbs: diff --git a/riscemu/priv/PrivCPU.py b/riscemu/priv/PrivCPU.py index 043eb85..5297060 100644 --- a/riscemu/priv/PrivCPU.py +++ b/riscemu/priv/PrivCPU.py @@ -15,7 +15,7 @@ from ..IO import TextIO from ..instructions import RV32A, RV32M if typing.TYPE_CHECKING: - from riscemu import Executable, LoadedExecutable, LoadedInstruction + from riscemu import base_types, LoadedExecutable, LoadedInstruction from riscemu.instructions.InstructionSet import InstructionSet @@ -95,7 +95,7 @@ class PrivCPU(CPU): print() print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE) - def load(self, e: riscemu.Executable): + def load(self, e: riscemu.base_types): raise NotImplementedError("Not supported!") def run_loaded(self, le: 'riscemu.LoadedExecutable'): diff --git a/riscemu/priv/PrivRV32I.py b/riscemu/priv/PrivRV32I.py index ca91f37..278767d 100644 --- a/riscemu/priv/PrivRV32I.py +++ b/riscemu/priv/PrivRV32I.py @@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT """ from ..instructions.RV32I import * -from ..Exceptions import INS_NOT_IMPLEMENTED +from ..exceptions import INS_NOT_IMPLEMENTED from .Exceptions import * from .privmodes import PrivModes from ..colors import FMT_CPU, FMT_NONE diff --git a/riscemu/Registers.py b/riscemu/registers.py similarity index 98% rename from riscemu/Registers.py rename to riscemu/registers.py index df282c1..a3de09a 100644 --- a/riscemu/Registers.py +++ b/riscemu/registers.py @@ -4,10 +4,10 @@ RiscEmu (c) 2021 Anton Lydike SPDX-License-Identifier: MIT """ -from .Config import RunConfig +from .config import RunConfig from .helpers import * from collections import defaultdict -from .Exceptions import InvalidRegisterException +from .exceptions import InvalidRegisterException class Registers: """ diff --git a/riscemu/Syscall.py b/riscemu/syscall.py similarity index 100% rename from riscemu/Syscall.py rename to riscemu/syscall.py diff --git a/riscemu/Tokenizer.py b/riscemu/tokenizer.py similarity index 94% rename from riscemu/Tokenizer.py rename to riscemu/tokenizer.py index db9d330..6e29dd7 100644 --- a/riscemu/Tokenizer.py +++ b/riscemu/tokenizer.py @@ -10,15 +10,16 @@ from enum import Enum, auto from typing import List, Iterable from riscemu.decoder import RISCV_REGS -from .Exceptions import ParseException +from .exceptions import ParseException LINE_COMMENT_STARTERS = ('#', ';', '//') WHITESPACE_PATTERN = re.compile(r'\s+') -MEMORY_ADDRESS_PATTERN = re.compile('^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$') +MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$') REGISTER_NAMES = RISCV_REGS I = lambda x: x + class TokenType(Enum): COMMA = auto() ARGUMENT = auto() @@ -40,6 +41,7 @@ class Token: return ', ' return '{}({}) '.format(self.type.name[0:3], self.value) + NEWLINE = Token(TokenType.NEWLINE, '\n') COMMA = Token(TokenType.COMMA, ',') diff --git a/riscemu/types.py b/riscemu/types.py new file mode 100644 index 0000000..49791b2 --- /dev/null +++ b/riscemu/types.py @@ -0,0 +1,72 @@ +from typing import List, Tuple +from .exceptions import MemoryAccessException +from .helpers import parse_numeric_argument +from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \ + T_AbsoluteAddress + + +class SimpleInstruction(Instruction): + def __init__(self, name: str, args: Tuple[str], context: InstructionContext, addr: T_RelativeAddress): + self.context = context + self.name = name + self.args = args + self.addr = addr + + def get_imm(self, num: int) -> int: + resolved_label = self.context.resolve_label(self.args[num], self.addr) + if resolved_label is None: + return parse_numeric_argument(self.args[num]) + return resolved_label + + def get_imm_reg(self, num: int) -> Tuple[int, str]: + return self.get_imm(num + 1), self.get_reg(num) + + def get_reg(self, num: int) -> str: + return self.args[num] + + +class InstructionMemorySection(MemorySection): + def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, base: int = 0): + self.name = name + self.base = base + self.context = context + self.size = len(instructions) * 4 + self.flags = MemoryFlags(True, True) + self.instructions = instructions + + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read') + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write') + + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + if offset % 4 != 0: + raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch') + return self.instructions[offset // 4] + + +class BinaryDataMemorySection(MemorySection): + def __init__(self, data: bytearray, name: str, context: InstructionContext, base: int = 0): + self.name = name + self.base = base + self.context = context + self.size = len(data) + self.flags = MemoryFlags(False, False) + self.data = data + + def read(self, offset: T_RelativeAddress, size: int) -> bytearray: + if offset + size > self.size: + raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read') + return self.data[offset:offset + size] + + def write(self, offset: T_RelativeAddress, size: int, data: bytearray): + if offset + size > self.size: + raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write') + if len(data[0:size]) != size: + raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write') + self.data[offset:offset + size] = data[0:size] + + def read_ins(self, offset: T_RelativeAddress) -> Instruction: + raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self), + offset, 4, 'instruction fetch') diff --git a/test/test_tokenizer.py b/test/test_tokenizer.py index 659abf1..dc6c410 100644 --- a/test/test_tokenizer.py +++ b/test/test_tokenizer.py @@ -1,6 +1,6 @@ from unittest import TestCase -from riscemu.Tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA +from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA def ins(name: str) -> Token: