From 6e6ce90e9a5f4cdaf7c37344f04e4f9e4899ce02 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Thu, 22 Apr 2021 14:29:10 +0200 Subject: [PATCH] added lots more documentation and copyright notices --- riscemu/CPU.py | 2 +- riscemu/Config.py | 6 +++ riscemu/Exceptions.py | 6 +++ riscemu/Executable.py | 10 ++++ riscemu/ExecutableParser.py | 102 ++++++++++++++++++++++++++++++------ riscemu/MMU.py | 76 ++++++++++++++++++++++++--- riscemu/Registers.py | 53 +++++++++++++++++-- riscemu/Syscall.py | 37 +++++++++++-- riscemu/Tokenizer.py | 18 +++++-- riscemu/__init__.py | 10 ++++ riscemu/colors.py | 6 +++ riscemu/debug.py | 7 +++ riscemu/helpers.py | 10 ++-- 13 files changed, 305 insertions(+), 38 deletions(-) diff --git a/riscemu/CPU.py b/riscemu/CPU.py index b9b585e..afb2949 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -62,7 +62,7 @@ class CPU: # provide global syscall symbols if option is set if conf.include_scall_symbols: - self.mmu.global_symbols.update(self.syscall_int.get_syscall_symbols()) + self.mmu.global_symbols.update(get_syscall_symbols()) def get_tokenizer(self, tokenizer_input): """ diff --git a/riscemu/Config.py b/riscemu/Config.py index 646f058..6946de9 100644 --- a/riscemu/Config.py +++ b/riscemu/Config.py @@ -1,3 +1,9 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause +""" + from dataclasses import dataclass from typing import Optional diff --git a/riscemu/Exceptions.py b/riscemu/Exceptions.py index d1605f7..0c26d83 100644 --- a/riscemu/Exceptions.py +++ b/riscemu/Exceptions.py @@ -1,3 +1,9 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause +""" + from abc import abstractmethod from .colors import * diff --git a/riscemu/Executable.py b/riscemu/Executable.py index 4f7865f..4a9969a 100644 --- a/riscemu/Executable.py +++ b/riscemu/Executable.py @@ -1,3 +1,13 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause + +This file holds Executable and LoadedExecutable classes as well as loading and some linking code. + +FIXME: refactor this code into muliple files +""" + from dataclasses import dataclass, field from typing import Dict, List, Tuple, Union, Optional from .Exceptions import * diff --git a/riscemu/ExecutableParser.py b/riscemu/ExecutableParser.py index fc2f251..71433dc 100644 --- a/riscemu/ExecutableParser.py +++ b/riscemu/ExecutableParser.py @@ -1,3 +1,11 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause + +This file holds the parser that parses the tokenizer output. +""" + from .helpers import parse_numeric_argument, int_to_bytes from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags from .Exceptions import * @@ -8,6 +16,9 @@ from typing import Dict, Tuple, List, Optional class ExecutableParser: + """ + Parses output form the RiscVTokenizer + """ tokenizer: 'RiscVTokenizer' def __init__(self, tokenizer: 'RiscVTokenizer'): @@ -20,7 +31,12 @@ class ExecutableParser: self.stack_pref: Optional[int] = None self.globals: List[str] = list() - def parse(self): + def parse(self) -> Executable: + """ + parse tokenizer output into an executable + :return: the parsed executable + :raise ParseException: Raises a ParseException when invalid input is read + """ for token in self.tokenizer.tokens: if isinstance(token, RiscVInstructionToken): self.parse_instruction(token) @@ -28,9 +44,9 @@ class ExecutableParser: self.handle_symbol(token) elif isinstance(token, RiscVPseudoOpToken): self.handle_pseudo_op(token) - return self.get_execuable() + return self._get_execuable() - def get_execuable(self): + def _get_execuable(self) -> Executable: start_ptr = ('text', 0) if '_start' in self.symbols: start_ptr = self.symbols['_start'] @@ -38,24 +54,36 @@ class ExecutableParser: start_ptr = self.symbols['main'] return Executable(start_ptr, self.sections, self.symbols, self.stack_pref, self.globals, self.tokenizer.name) - def parse_instruction(self, ins: 'RiscVInstructionToken'): + def parse_instruction(self, ins: 'RiscVInstructionToken') -> None: + """ + parses an Instruction token + :param ins: the instruction token + """ if self.active_section is None: self.op_text() self.implicit_sections = True ASSERT_EQ(self.active_section, 'text') - sec = self.curr_sec() + sec = self._curr_sec() if isinstance(sec, InstructionMemorySection): sec.add_insn(ins) else: raise ParseException("SHOULD NOT BE REACHED") def handle_symbol(self, token: 'RiscVSymbolToken'): + """ + Handle a symbol token (such as 'main:') + :param token: the symbol token + """ ASSERT_NOT_IN(token.name, self.symbols) - sec_pos = self.curr_sec().size + sec_pos = self._curr_sec().size self.symbols[token.name] = (self.active_section, sec_pos) def handle_pseudo_op(self, op: 'RiscVPseudoOpToken'): + """ + Handle a pseudo op token (such as '.word 0xffaabbcc') + :param op: the peseudo-op token + """ name = 'op_' + op.name if hasattr(self, name): getattr(self, name)(op) @@ -64,68 +92,112 @@ class ExecutableParser: ## Pseudo op implementations: def op_section(self, op: 'RiscVPseudoOpToken'): + """ + handles a .section token + :param op: The token + """ ASSERT_LEN(op.args, 1) name = op.args[0][1:] ASSERT_IN(name, ('data', 'rodata', 'text')) getattr(self, 'op_' + name)(op) def op_text(self, op: 'RiscVPseudoOpToken' = None): - self.set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection) + """ + handles a .text token + :param op: The token + """ + self._set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection) def op_data(self, op: 'RiscVPseudoOpToken' = None): - self.set_sec('data', MemoryFlags(read_only=False, executable=False)) + """ + handles a .data token + :param op: The token + """ + self._set_sec('data', MemoryFlags(read_only=False, executable=False)) def op_rodata(self, op: 'RiscVPseudoOpToken' = None): - self.set_sec('rodata', MemoryFlags(read_only=True, executable=False)) + """ + handles a .rodata token + :param op: The token + """ + self._set_sec('rodata', MemoryFlags(read_only=True, executable=False)) def op_space(self, op: 'RiscVPseudoOpToken'): + """ + handles a .space token. Inserts empty space into the current (data or rodata) section + :param op: The token + """ ASSERT_IN(self.active_section, ('data', 'rodata')) ASSERT_LEN(op.args, 1) size = parse_numeric_argument(op.args[0]) - self.curr_sec().add(bytearray(size)) + self._curr_sec().add(bytearray(size)) def op_ascii(self, op: 'RiscVPseudoOpToken'): + """ + handles a .ascii token. Inserts ascii encoded text into the currrent data section + :param op: The token + """ ASSERT_IN(self.active_section, ('data', 'rodata')) ASSERT_LEN(op.args, 1) str = op.args[0][1:-1].encode('ascii').decode('unicode_escape') - self.curr_sec().add(bytearray(str, 'ascii')) + self._curr_sec().add(bytearray(str, 'ascii')) def op_asciiz(self, op: 'RiscVPseudoOpToken'): + """ + handles a .ascii token. Inserts nullterminated ascii encoded text into the currrent data section + :param op: The token + """ ASSERT_IN(self.active_section, ('data', 'rodata')) ASSERT_LEN(op.args, 1) str = op.args[0][1:-1].encode('ascii').decode('unicode_escape') - self.curr_sec().add(bytearray(str + '\0', 'ascii')) + self._curr_sec().add(bytearray(str + '\0', 'ascii')) def op_stack(self, op: 'RiscVPseudoOpToken'): + """ + handles a .stack token. Sets the stack size preferences + :param op: The token + """ ASSERT_LEN(op.args, 1) size = parse_numeric_argument(op.args[0]) self.stack_pref = size def op_global(self, op: 'RiscVPseudoOpToken'): + """ + handles a .global token. Marks the token as global + :param op: The token + """ ASSERT_LEN(op.args, 1) name = op.args[0] self.globals.append(name) def op_set(self, op: 'RiscVPseudoOpToken'): + """ + handles a .set name, val token. Sets the symbol name to val + :param op: The token + """ ASSERT_LEN(op.args, 2) name = op.args[0] val = parse_numeric_argument(op.args[1]) self.symbols[name] = ('_static_', val) def op_align(self, op: 'RiscVPseudoOpToken'): + """ + handles an align token. Currently a nop (just not implemented fully yet, as linker handles most alignement tasks) + :param op: The token + """ pass def op_word(self, op: 'RiscVPseudoOpToken'): ASSERT_LEN(op.args, 1) val = parse_numeric_argument(op.args[0]) - self.curr_sec().add(int_to_bytes(val, 4)) + self._curr_sec().add(int_to_bytes(val, 4)) ## Section handler code - def set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection): + def _set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection): if name not in self.sections: self.sections[name] = cls(name, flags) self.active_section = name - def curr_sec(self): + def _curr_sec(self): return self.sections[self.active_section] diff --git a/riscemu/MMU.py b/riscemu/MMU.py index ec60e29..5f77b4e 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -1,5 +1,11 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause +""" + from .Config import RunConfig -from .Executable import Executable, LoadedExecutable, LoadedMemorySection +from .Executable import Executable, LoadedExecutable, LoadedMemorySection, LoadedInstruction from .helpers import align_addr from .Exceptions import OutOfMemoryException from .colors import * @@ -7,13 +13,33 @@ from typing import Dict, List, Tuple, Optional class MMU: + """ + The MemoryManagementUnit (handles loading binaries, and reading/writing data) + """ + + """ + The maximum size of the memory in bytes + """ max_size = 0xFFFFFFFF - # make each block accessible by it's base addr + + """ + A list of all loaded memory sections + """ sections: List[LoadedMemorySection] + """ + A list of all loaded executables + """ binaries: List[LoadedExecutable] + + """ + The last loaded executable (the next executable is inserted directly after this one) + """ last_bin: Optional[LoadedExecutable] = None + """ + The global symbol table + """ global_symbols: Dict[str, int] def __init__(self, conf: RunConfig): @@ -23,7 +49,13 @@ class MMU: self.conf = conf self.global_symbols = dict() - def load_bin(self, bin: Executable): + def load_bin(self, bin: Executable) -> LoadedExecutable: + """ + Load an executable into memory + :param bin: the executable to load + :return: A LoadedExecutable + :raises OutOfMemoryException: When all memory is used + """ if self.last_bin is None: addr = 0x100 # start at 0x100 instead of 0x00 else: @@ -54,31 +86,63 @@ class MMU: return loaded_bin def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]: + """ + Returns the section that contains the address addr + :param addr: the Address to look for + :return: The LoadedMemorySection or None + """ for sec in self.sections: if sec.base <= addr < sec.base + sec.size: return sec return None - def read_ins(self, addr: int): + def read_ins(self, addr: int) -> LoadedInstruction: + """ + Read a single instruction located at addr + :param addr: The location + :return: The Instruction + """ sec = self.get_sec_containing(addr) return sec.read_instruction(addr - sec.base) - def read(self, addr: int, size: int): + def read(self, addr: int, size: int) -> bytearray: + """ + Read size bytes of memory at addr + :param addr: The addres at which to start reading + :param size: The number of bytes to read + :return: The bytearray at addr + """ sec = self.get_sec_containing(addr) return sec.read(addr - sec.base, size) def write(self, addr: int, size: int, data): + """ + Write bytes into memory + :param addr: The address at which to write + :param size: The number of bytes to write + :param data: The bytearray to write (only first size bytes are written) + """ sec = self.get_sec_containing(addr) return sec.write(addr - sec.base, size, data) - # debugging interactions: def dump(self, addr, *args, **kwargs): + """ + Dumpy the memory contents + + :param addr: The address at which to dump + :param args: args for the dump function of the loaded memory section + :param kwargs: kwargs for the dump function of the loaded memory section + """ sec = self.get_sec_containing(addr) if sec is None: return sec.dump(addr, *args, **kwargs) def symbol(self, symb:str): + """ + Look up the symbol symb in all local symbol tables (and the global one) + :param symb: The symbol name to look up + """ print(FMT_MEM + "[MMU] Lookup for symbol {}:".format(symb) + FMT_NONE) if symb in self.global_symbols: print(" Found global symbol {}: 0x{:X}".format(symb, self.global_symbols[symb])) diff --git a/riscemu/Registers.py b/riscemu/Registers.py index d45b076..d45073b 100644 --- a/riscemu/Registers.py +++ b/riscemu/Registers.py @@ -1,17 +1,35 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause +""" + from .Config import RunConfig from .helpers import * from collections import defaultdict from .Exceptions import InvalidRegisterException class Registers: + """ + Represents a bunch of registers + """ + def __init__(self, conf: RunConfig): + """ + Initialize the register configuration, respecting the RunConfig conf + :param conf: The RunConfig + """ self.vals = defaultdict(lambda: 0) self.last_set = None self.last_read = None self.conf = conf def dump(self, full=False): - named_regs = [self.reg_repr(reg) for reg in Registers.named_registers()] + """ + Dump all registers to stdout + :param full: If True, floating point registers are dumped too + """ + named_regs = [self._reg_repr(reg) for reg in Registers.named_registers()] lines = [[] for i in range(12)] if not full: @@ -31,7 +49,7 @@ class Registers: lines[i].append(" " * 15) else: reg = '{}{}'.format(name, i) - lines[i].append(self.reg_repr(reg)) + lines[i].append(self._reg_repr(reg)) print("Registers[{},{}](".format( FMT_ORANGE + FMT_UNDERLINE + 'read' + FMT_NONE, @@ -49,9 +67,12 @@ class Registers: print(")") def dump_reg_a(self): - print("Registers[a]:" + " ".join(self.reg_repr('a{}'.format(i)) for i in range(8))) + """ + Dump the a registers + """ + print("Registers[a]:" + " ".join(self._reg_repr('a{}'.format(i)) for i in range(8))) - def reg_repr(self, reg): + def _reg_repr(self, reg): txt = '{:4}=0x{:08X}'.format(reg, self.get(reg, False)) if reg == 'fp': reg = 's0' @@ -65,7 +86,14 @@ class Registers: return FMT_GRAY + txt + FMT_NONE return txt - def set(self, reg, val, mark_set=True): + def set(self, reg, val, mark_set=True) -> bool: + """ + Set a register content to val + :param reg: The register to set + :param val: The new value + :param mark_set: If True, marks this register as "last accessed" (only used internally) + :return: If the operation was successful + """ if reg == 'zero': print("[Registers.set] trying to set read-only register: {}".format(reg)) return False @@ -77,8 +105,15 @@ class Registers: if mark_set: self.last_set = reg self.vals[reg] = val + return True def get(self, reg, mark_read=True): + """ + Retuns the contents of register reg + :param reg: The register name + :param mark_read: If the register should be markes as "last read" (only used internally) + :return: The contents of register reg + """ if reg not in Registers.all_registers(): raise InvalidRegisterException(reg) if reg == 'fp': @@ -89,6 +124,10 @@ class Registers: @staticmethod def all_registers(): + """ + Return a list of all valid registers + :return: The list + """ return ['zero', 'ra', 'sp', 'gp', 'tp', 's0', 'fp', 't0', 't1', 't2', 't3', 't4', 't5', 't6', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', @@ -99,4 +138,8 @@ class Registers: @staticmethod def named_registers(): + """ + Return all named registers + :return: The list + """ return ['zero', 'ra', 'sp', 'gp', 'tp', 'fp'] diff --git a/riscemu/Syscall.py b/riscemu/Syscall.py index ec9fe51..7fae1f7 100644 --- a/riscemu/Syscall.py +++ b/riscemu/Syscall.py @@ -1,3 +1,9 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause +""" + from dataclasses import dataclass from typing import Dict, IO import sys @@ -11,6 +17,9 @@ import typing if typing.TYPE_CHECKING: from . import CPU +""" +All available syscalls (mapped id->name) +""" SYSCALLS = { 63: 'read', 64: 'write', @@ -19,6 +28,9 @@ SYSCALLS = { 1025: 'close', } +""" +All available file open modes +""" OPEN_MODES = { 0: 'rb', 1: 'wb', @@ -30,6 +42,9 @@ OPEN_MODES = { @dataclass(frozen=True) class Syscall: + """ + Represents a syscall + """ id: int registers: Registers cpu: 'CPU' @@ -46,7 +61,21 @@ class Syscall: def ret(self, code): self.registers.set('a0', code) + +def get_syscall_symbols(): + """ + Retuns a dictionary of all syscall symbols (SCALL_ -> id) + :return: + """ + return { + ('SCALL_' + name.upper()): num for num, name in SYSCALLS.items() + } + + class SyscallInterface: + """ + Handles syscalls + """ open_files: Dict[int, IO] next_open_handle: int @@ -163,14 +192,12 @@ class SyscallInterface: return scall.ret(0) def exit(self, scall: Syscall): + """ + Exit syscall. Exits the system with status code a0 + """ scall.cpu.exit = True scall.cpu.exit_code = scall.registers.get('a0') - def get_syscall_symbols(self): - return { - ('SCALL_' + name.upper()): num for num, name in SYSCALLS.items() - } - def __repr__(self): return "{}(\n\tfiles={}\n)".format( self.__class__.__name__, diff --git a/riscemu/Tokenizer.py b/riscemu/Tokenizer.py index 434309f..4911700 100644 --- a/riscemu/Tokenizer.py +++ b/riscemu/Tokenizer.py @@ -1,3 +1,9 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause +""" + import re from enum import IntEnum from typing import List @@ -75,6 +81,9 @@ def split_accepting_quotes(string, at=REG_ARG_SPLIT, quotes=('"', "'")): class RiscVInput: + """ + Represents an Assembly file + """ def __init__(self, content: str, name: str): self.content = content self.pos = 0 @@ -240,10 +249,13 @@ class RiscVPseudoOpToken(RiscVToken): class RiscVTokenizer: - def __init__(self, input: RiscVInput, instructions: List[str]): - self.input = input + """ + A tokenizer for the RISC-V syntax of a given CPU + """ + def __init__(self, input_assembly: RiscVInput, instructions: List[str]): + self.input = input_assembly self.tokens: List[RiscVToken] = [] - self.name = input.name + self.name = input_assembly.name self.instructions = instructions def tokenize(self): diff --git a/riscemu/__init__.py b/riscemu/__init__.py index e80c898..3555f9c 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -1,3 +1,13 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause + +This package aims at providing an all-round usable RISC-V emulator and debugger + +It contains everything needed to run assembly files, so you don't need any custom compilers or toolchains +""" + from .Exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException diff --git a/riscemu/colors.py b/riscemu/colors.py index 6838a81..a08ebd1 100644 --- a/riscemu/colors.py +++ b/riscemu/colors.py @@ -1,3 +1,9 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause +""" + # Colors FMT_RED = '\033[31m' diff --git a/riscemu/debug.py b/riscemu/debug.py index 5cae80c..f8fe134 100644 --- a/riscemu/debug.py +++ b/riscemu/debug.py @@ -1,3 +1,10 @@ +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause +""" + + import typing from .Registers import Registers from .colors import FMT_DEBUG, FMT_NONE diff --git a/riscemu/helpers.py b/riscemu/helpers.py index 922f860..dc75461 100644 --- a/riscemu/helpers.py +++ b/riscemu/helpers.py @@ -1,6 +1,10 @@ -from math import log10, ceil, log -from .Exceptions import NumberFormatException -from .colors import * +""" +RiscEmu (c) 2021 Anton Lydike + +SPDX-License-Identifier: BSD-2-Clause +""" + +from math import log10, ceil from .Exceptions import *