added lots more documentation and copyright notices

This commit is contained in:
Anton Lydike 2021-04-22 14:29:10 +02:00
parent 2a68f16e99
commit 6e6ce90e9a
13 changed files with 305 additions and 38 deletions

View File

@ -62,7 +62,7 @@ class CPU:
# provide global syscall symbols if option is set
if conf.include_scall_symbols:
self.mmu.global_symbols.update(self.syscall_int.get_syscall_symbols())
self.mmu.global_symbols.update(get_syscall_symbols())
def get_tokenizer(self, tokenizer_input):
"""

View File

@ -1,3 +1,9 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
"""
from dataclasses import dataclass
from typing import Optional

View File

@ -1,3 +1,9 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
"""
from abc import abstractmethod
from .colors import *

View File

@ -1,3 +1,13 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
This file holds Executable and LoadedExecutable classes as well as loading and some linking code.
FIXME: refactor this code into muliple files
"""
from dataclasses import dataclass, field
from typing import Dict, List, Tuple, Union, Optional
from .Exceptions import *

View File

@ -1,3 +1,11 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
This file holds the parser that parses the tokenizer output.
"""
from .helpers import parse_numeric_argument, int_to_bytes
from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags
from .Exceptions import *
@ -8,6 +16,9 @@ from typing import Dict, Tuple, List, Optional
class ExecutableParser:
"""
Parses output form the RiscVTokenizer
"""
tokenizer: 'RiscVTokenizer'
def __init__(self, tokenizer: 'RiscVTokenizer'):
@ -20,7 +31,12 @@ class ExecutableParser:
self.stack_pref: Optional[int] = None
self.globals: List[str] = list()
def parse(self):
def parse(self) -> Executable:
"""
parse tokenizer output into an executable
:return: the parsed executable
:raise ParseException: Raises a ParseException when invalid input is read
"""
for token in self.tokenizer.tokens:
if isinstance(token, RiscVInstructionToken):
self.parse_instruction(token)
@ -28,9 +44,9 @@ class ExecutableParser:
self.handle_symbol(token)
elif isinstance(token, RiscVPseudoOpToken):
self.handle_pseudo_op(token)
return self.get_execuable()
return self._get_execuable()
def get_execuable(self):
def _get_execuable(self) -> Executable:
start_ptr = ('text', 0)
if '_start' in self.symbols:
start_ptr = self.symbols['_start']
@ -38,24 +54,36 @@ class ExecutableParser:
start_ptr = self.symbols['main']
return Executable(start_ptr, self.sections, self.symbols, self.stack_pref, self.globals, self.tokenizer.name)
def parse_instruction(self, ins: 'RiscVInstructionToken'):
def parse_instruction(self, ins: 'RiscVInstructionToken') -> None:
"""
parses an Instruction token
:param ins: the instruction token
"""
if self.active_section is None:
self.op_text()
self.implicit_sections = True
ASSERT_EQ(self.active_section, 'text')
sec = self.curr_sec()
sec = self._curr_sec()
if isinstance(sec, InstructionMemorySection):
sec.add_insn(ins)
else:
raise ParseException("SHOULD NOT BE REACHED")
def handle_symbol(self, token: 'RiscVSymbolToken'):
"""
Handle a symbol token (such as 'main:')
:param token: the symbol token
"""
ASSERT_NOT_IN(token.name, self.symbols)
sec_pos = self.curr_sec().size
sec_pos = self._curr_sec().size
self.symbols[token.name] = (self.active_section, sec_pos)
def handle_pseudo_op(self, op: 'RiscVPseudoOpToken'):
"""
Handle a pseudo op token (such as '.word 0xffaabbcc')
:param op: the peseudo-op token
"""
name = 'op_' + op.name
if hasattr(self, name):
getattr(self, name)(op)
@ -64,68 +92,112 @@ class ExecutableParser:
## Pseudo op implementations:
def op_section(self, op: 'RiscVPseudoOpToken'):
"""
handles a .section token
:param op: The token
"""
ASSERT_LEN(op.args, 1)
name = op.args[0][1:]
ASSERT_IN(name, ('data', 'rodata', 'text'))
getattr(self, 'op_' + name)(op)
def op_text(self, op: 'RiscVPseudoOpToken' = None):
self.set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection)
"""
handles a .text token
:param op: The token
"""
self._set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection)
def op_data(self, op: 'RiscVPseudoOpToken' = None):
self.set_sec('data', MemoryFlags(read_only=False, executable=False))
"""
handles a .data token
:param op: The token
"""
self._set_sec('data', MemoryFlags(read_only=False, executable=False))
def op_rodata(self, op: 'RiscVPseudoOpToken' = None):
self.set_sec('rodata', MemoryFlags(read_only=True, executable=False))
"""
handles a .rodata token
:param op: The token
"""
self._set_sec('rodata', MemoryFlags(read_only=True, executable=False))
def op_space(self, op: 'RiscVPseudoOpToken'):
"""
handles a .space token. Inserts empty space into the current (data or rodata) section
:param op: The token
"""
ASSERT_IN(self.active_section, ('data', 'rodata'))
ASSERT_LEN(op.args, 1)
size = parse_numeric_argument(op.args[0])
self.curr_sec().add(bytearray(size))
self._curr_sec().add(bytearray(size))
def op_ascii(self, op: 'RiscVPseudoOpToken'):
"""
handles a .ascii token. Inserts ascii encoded text into the currrent data section
:param op: The token
"""
ASSERT_IN(self.active_section, ('data', 'rodata'))
ASSERT_LEN(op.args, 1)
str = op.args[0][1:-1].encode('ascii').decode('unicode_escape')
self.curr_sec().add(bytearray(str, 'ascii'))
self._curr_sec().add(bytearray(str, 'ascii'))
def op_asciiz(self, op: 'RiscVPseudoOpToken'):
"""
handles a .ascii token. Inserts nullterminated ascii encoded text into the currrent data section
:param op: The token
"""
ASSERT_IN(self.active_section, ('data', 'rodata'))
ASSERT_LEN(op.args, 1)
str = op.args[0][1:-1].encode('ascii').decode('unicode_escape')
self.curr_sec().add(bytearray(str + '\0', 'ascii'))
self._curr_sec().add(bytearray(str + '\0', 'ascii'))
def op_stack(self, op: 'RiscVPseudoOpToken'):
"""
handles a .stack token. Sets the stack size preferences
:param op: The token
"""
ASSERT_LEN(op.args, 1)
size = parse_numeric_argument(op.args[0])
self.stack_pref = size
def op_global(self, op: 'RiscVPseudoOpToken'):
"""
handles a .global token. Marks the token as global
:param op: The token
"""
ASSERT_LEN(op.args, 1)
name = op.args[0]
self.globals.append(name)
def op_set(self, op: 'RiscVPseudoOpToken'):
"""
handles a .set name, val token. Sets the symbol name to val
:param op: The token
"""
ASSERT_LEN(op.args, 2)
name = op.args[0]
val = parse_numeric_argument(op.args[1])
self.symbols[name] = ('_static_', val)
def op_align(self, op: 'RiscVPseudoOpToken'):
"""
handles an align token. Currently a nop (just not implemented fully yet, as linker handles most alignement tasks)
:param op: The token
"""
pass
def op_word(self, op: 'RiscVPseudoOpToken'):
ASSERT_LEN(op.args, 1)
val = parse_numeric_argument(op.args[0])
self.curr_sec().add(int_to_bytes(val, 4))
self._curr_sec().add(int_to_bytes(val, 4))
## Section handler code
def set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection):
def _set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection):
if name not in self.sections:
self.sections[name] = cls(name, flags)
self.active_section = name
def curr_sec(self):
def _curr_sec(self):
return self.sections[self.active_section]

View File

@ -1,5 +1,11 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
"""
from .Config import RunConfig
from .Executable import Executable, LoadedExecutable, LoadedMemorySection
from .Executable import Executable, LoadedExecutable, LoadedMemorySection, LoadedInstruction
from .helpers import align_addr
from .Exceptions import OutOfMemoryException
from .colors import *
@ -7,13 +13,33 @@ from typing import Dict, List, Tuple, Optional
class MMU:
"""
The MemoryManagementUnit (handles loading binaries, and reading/writing data)
"""
"""
The maximum size of the memory in bytes
"""
max_size = 0xFFFFFFFF
# make each block accessible by it's base addr
"""
A list of all loaded memory sections
"""
sections: List[LoadedMemorySection]
"""
A list of all loaded executables
"""
binaries: List[LoadedExecutable]
"""
The last loaded executable (the next executable is inserted directly after this one)
"""
last_bin: Optional[LoadedExecutable] = None
"""
The global symbol table
"""
global_symbols: Dict[str, int]
def __init__(self, conf: RunConfig):
@ -23,7 +49,13 @@ class MMU:
self.conf = conf
self.global_symbols = dict()
def load_bin(self, bin: Executable):
def load_bin(self, bin: Executable) -> LoadedExecutable:
"""
Load an executable into memory
:param bin: the executable to load
:return: A LoadedExecutable
:raises OutOfMemoryException: When all memory is used
"""
if self.last_bin is None:
addr = 0x100 # start at 0x100 instead of 0x00
else:
@ -54,31 +86,63 @@ class MMU:
return loaded_bin
def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]:
"""
Returns the section that contains the address addr
:param addr: the Address to look for
:return: The LoadedMemorySection or None
"""
for sec in self.sections:
if sec.base <= addr < sec.base + sec.size:
return sec
return None
def read_ins(self, addr: int):
def read_ins(self, addr: int) -> LoadedInstruction:
"""
Read a single instruction located at addr
:param addr: The location
:return: The Instruction
"""
sec = self.get_sec_containing(addr)
return sec.read_instruction(addr - sec.base)
def read(self, addr: int, size: int):
def read(self, addr: int, size: int) -> bytearray:
"""
Read size bytes of memory at addr
:param addr: The addres at which to start reading
:param size: The number of bytes to read
:return: The bytearray at addr
"""
sec = self.get_sec_containing(addr)
return sec.read(addr - sec.base, size)
def write(self, addr: int, size: int, data):
"""
Write bytes into memory
:param addr: The address at which to write
:param size: The number of bytes to write
:param data: The bytearray to write (only first size bytes are written)
"""
sec = self.get_sec_containing(addr)
return sec.write(addr - sec.base, size, data)
# debugging interactions:
def dump(self, addr, *args, **kwargs):
"""
Dumpy the memory contents
:param addr: The address at which to dump
:param args: args for the dump function of the loaded memory section
:param kwargs: kwargs for the dump function of the loaded memory section
"""
sec = self.get_sec_containing(addr)
if sec is None:
return
sec.dump(addr, *args, **kwargs)
def symbol(self, symb:str):
"""
Look up the symbol symb in all local symbol tables (and the global one)
:param symb: The symbol name to look up
"""
print(FMT_MEM + "[MMU] Lookup for symbol {}:".format(symb) + FMT_NONE)
if symb in self.global_symbols:
print(" Found global symbol {}: 0x{:X}".format(symb, self.global_symbols[symb]))

View File

@ -1,17 +1,35 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
"""
from .Config import RunConfig
from .helpers import *
from collections import defaultdict
from .Exceptions import InvalidRegisterException
class Registers:
"""
Represents a bunch of registers
"""
def __init__(self, conf: RunConfig):
"""
Initialize the register configuration, respecting the RunConfig conf
:param conf: The RunConfig
"""
self.vals = defaultdict(lambda: 0)
self.last_set = None
self.last_read = None
self.conf = conf
def dump(self, full=False):
named_regs = [self.reg_repr(reg) for reg in Registers.named_registers()]
"""
Dump all registers to stdout
:param full: If True, floating point registers are dumped too
"""
named_regs = [self._reg_repr(reg) for reg in Registers.named_registers()]
lines = [[] for i in range(12)]
if not full:
@ -31,7 +49,7 @@ class Registers:
lines[i].append(" " * 15)
else:
reg = '{}{}'.format(name, i)
lines[i].append(self.reg_repr(reg))
lines[i].append(self._reg_repr(reg))
print("Registers[{},{}](".format(
FMT_ORANGE + FMT_UNDERLINE + 'read' + FMT_NONE,
@ -49,9 +67,12 @@ class Registers:
print(")")
def dump_reg_a(self):
print("Registers[a]:" + " ".join(self.reg_repr('a{}'.format(i)) for i in range(8)))
"""
Dump the a registers
"""
print("Registers[a]:" + " ".join(self._reg_repr('a{}'.format(i)) for i in range(8)))
def reg_repr(self, reg):
def _reg_repr(self, reg):
txt = '{:4}=0x{:08X}'.format(reg, self.get(reg, False))
if reg == 'fp':
reg = 's0'
@ -65,7 +86,14 @@ class Registers:
return FMT_GRAY + txt + FMT_NONE
return txt
def set(self, reg, val, mark_set=True):
def set(self, reg, val, mark_set=True) -> bool:
"""
Set a register content to val
:param reg: The register to set
:param val: The new value
:param mark_set: If True, marks this register as "last accessed" (only used internally)
:return: If the operation was successful
"""
if reg == 'zero':
print("[Registers.set] trying to set read-only register: {}".format(reg))
return False
@ -77,8 +105,15 @@ class Registers:
if mark_set:
self.last_set = reg
self.vals[reg] = val
return True
def get(self, reg, mark_read=True):
"""
Retuns the contents of register reg
:param reg: The register name
:param mark_read: If the register should be markes as "last read" (only used internally)
:return: The contents of register reg
"""
if reg not in Registers.all_registers():
raise InvalidRegisterException(reg)
if reg == 'fp':
@ -89,6 +124,10 @@ class Registers:
@staticmethod
def all_registers():
"""
Return a list of all valid registers
:return: The list
"""
return ['zero', 'ra', 'sp', 'gp', 'tp', 's0', 'fp',
't0', 't1', 't2', 't3', 't4', 't5', 't6',
's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11',
@ -99,4 +138,8 @@ class Registers:
@staticmethod
def named_registers():
"""
Return all named registers
:return: The list
"""
return ['zero', 'ra', 'sp', 'gp', 'tp', 'fp']

View File

@ -1,3 +1,9 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
"""
from dataclasses import dataclass
from typing import Dict, IO
import sys
@ -11,6 +17,9 @@ import typing
if typing.TYPE_CHECKING:
from . import CPU
"""
All available syscalls (mapped id->name)
"""
SYSCALLS = {
63: 'read',
64: 'write',
@ -19,6 +28,9 @@ SYSCALLS = {
1025: 'close',
}
"""
All available file open modes
"""
OPEN_MODES = {
0: 'rb',
1: 'wb',
@ -30,6 +42,9 @@ OPEN_MODES = {
@dataclass(frozen=True)
class Syscall:
"""
Represents a syscall
"""
id: int
registers: Registers
cpu: 'CPU'
@ -46,7 +61,21 @@ class Syscall:
def ret(self, code):
self.registers.set('a0', code)
def get_syscall_symbols():
"""
Retuns a dictionary of all syscall symbols (SCALL_<name> -> id)
:return:
"""
return {
('SCALL_' + name.upper()): num for num, name in SYSCALLS.items()
}
class SyscallInterface:
"""
Handles syscalls
"""
open_files: Dict[int, IO]
next_open_handle: int
@ -163,14 +192,12 @@ class SyscallInterface:
return scall.ret(0)
def exit(self, scall: Syscall):
"""
Exit syscall. Exits the system with status code a0
"""
scall.cpu.exit = True
scall.cpu.exit_code = scall.registers.get('a0')
def get_syscall_symbols(self):
return {
('SCALL_' + name.upper()): num for num, name in SYSCALLS.items()
}
def __repr__(self):
return "{}(\n\tfiles={}\n)".format(
self.__class__.__name__,

View File

@ -1,3 +1,9 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
"""
import re
from enum import IntEnum
from typing import List
@ -75,6 +81,9 @@ def split_accepting_quotes(string, at=REG_ARG_SPLIT, quotes=('"', "'")):
class RiscVInput:
"""
Represents an Assembly file
"""
def __init__(self, content: str, name: str):
self.content = content
self.pos = 0
@ -240,10 +249,13 @@ class RiscVPseudoOpToken(RiscVToken):
class RiscVTokenizer:
def __init__(self, input: RiscVInput, instructions: List[str]):
self.input = input
"""
A tokenizer for the RISC-V syntax of a given CPU
"""
def __init__(self, input_assembly: RiscVInput, instructions: List[str]):
self.input = input_assembly
self.tokens: List[RiscVToken] = []
self.name = input.name
self.name = input_assembly.name
self.instructions = instructions
def tokenize(self):

View File

@ -1,3 +1,13 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
This package aims at providing an all-round usable RISC-V emulator and debugger
It contains everything needed to run assembly files, so you don't need any custom compilers or toolchains
"""
from .Exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \
ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException

View File

@ -1,3 +1,9 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
"""
# Colors
FMT_RED = '\033[31m'

View File

@ -1,3 +1,10 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
"""
import typing
from .Registers import Registers
from .colors import FMT_DEBUG, FMT_NONE

View File

@ -1,6 +1,10 @@
from math import log10, ceil, log
from .Exceptions import NumberFormatException
from .colors import *
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: BSD-2-Clause
"""
from math import log10, ceil
from .Exceptions import *