Merge pull request #5 from AntonLydike/assembly-parser-rework

- Completely revamped parsing of readable assembly - Completely revamped internal types - Added hard and correct 32 bit integer handling in registers
3 years ago · 4b77ce05a7
parent 52e189c226 fe4b3efb6f
commit 4b77ce05a7
64 changed files with 2853 additions and 1899 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,4 @@ __pycache__
 .mypy_cache
 dist/
 riscemu.egg-info
+build/
--- a/.idea/riscemu.iml
+++ b/.idea/riscemu.iml
@ -2,7 +2,11 @@
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/test" isTestSource="true" />
      <excludeFolder url="file://$MODULE_DIR$/venv" />
+      <excludeFolder url="file://$MODULE_DIR$/dist" />
+      <excludeFolder url="file://$MODULE_DIR$/.mypy_cache" />
+      <excludeFolder url="file://$MODULE_DIR$/riscemu.egg-info" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@ -12,7 +12,7 @@ sphinx:

 # Optionally set the version of Python and requirements required to build your docs
 python:
-   version: 3.7
+   version: "3.7"
   system_packages: true
   install:
      - requirements: sphinx-docs/requirements.txt
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2021 Anton Lydike
+Copyright (c) 2021-2022 Anton Lydike

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/README.md
+++ b/README.md
@ -8,7 +8,7 @@ or [riscemu.datenvorr.at](https://riscemu.datenvorr.at/index.html).
 This emulator contains:
 * RISC-V Assembly parser
 * RISC-V Assembly loader
-* Emulation for parts of the basic RISC-V instruction set
+* Emulation for most parts of the basic RISC-V instruction set and the M and A extensions
 * Naive memory emulator
 * Basic implementation of some syscalls 
 * A debugging environment
@ -97,7 +97,6 @@ generate and make all doc files for you. Finally, you can open the docs locall b
  * RISC-V reference card: https://www.cl.cam.ac.uk/teaching/1617/ECAD+Arch/files/docs/RISCVGreenCardv8-20151013.pdf
  
 ## TODO:
- * Currently registers don't enforce 32 bit (no overflows etc)
 * Correctly handle 12 and 20 bit immediate (currently not limited to bits at all)
 * Add a cycle limit to the options and CPU to catch infinite loops
 * Move away from `print` and use `logging.logger` instead
--- a/generate-docs.sh
+++ b/generate-docs.sh
@ -6,7 +6,7 @@ echo "Generating docs!"

 if ! command -v 'sphinx-apidoc'; then
  source venv/bin/activate
-  pip install sphinx
+  pip install -r sphinx-docs/requirements.txt
 fi


--- a/riscemu/CPU.py
+++ b/riscemu/CPU.py
@ -1,189 +1,114 @@
 """
-RiscEmu (c) 2021 Anton Lydike
+RiscEmu (c) 2021-2022 Anton Lydike

 SPDX-License-Identifier: MIT

-This file contains the CPU logic (not the individual instruction sets). See instructions/InstructionSet.py for more info
+This file contains the CPU logic (not the individual instruction sets). See instructions/instruction_set.py for more info
 on them.
 """
-import sys
-from typing import Tuple, List, Dict, Callable, Type
+import typing
+from typing import List, Type

-from .Tokenizer import RiscVTokenizer
-from .Executable import MemoryFlags
-from .Syscall import SyscallInterface, get_syscall_symbols
-from .Exceptions import RiscemuBaseException, LaunchDebuggerException
+import riscemu
+from .config import RunConfig
 from .MMU import MMU
-from .Config import RunConfig
-from .Registers import Registers
+from .colors import FMT_CPU, FMT_NONE
 from .debug import launch_debug_session
-from .colors import FMT_CPU, FMT_NONE, FMT_ERROR
-
-import riscemu
-
-import typing
+from .types.exceptions import RiscemuBaseException, LaunchDebuggerException
+from .syscall import SyscallInterface, get_syscall_symbols
+from .types import CPU, ProgramLoader, Int32, BinaryDataMemorySection
+from .parser import AssemblyFileLoader

 if typing.TYPE_CHECKING:
-    from . import Executable, LoadedExecutable, LoadedInstruction
-    from .instructions.InstructionSet import InstructionSet
+    from .instructions.instruction_set import InstructionSet


-class CPU:
+class UserModeCPU(CPU):
    """
    This class represents a single CPU. It holds references to it's mmu, registers and syscall interrupt handler.

    It is initialized with a configuration and a list of instruction sets.
    """

-    INS_XLEN = 1
-
-    def __init__(self, conf: RunConfig, instruction_sets: List[Type['riscemu.InstructionSet']]):
+    def __init__(self, instruction_sets: List[Type['riscemu.InstructionSet']], conf: RunConfig):
        """
        Creates a CPU instance.

-        :param conf: An instance of the current RunConfiguration
        :param instruction_sets: A list of instruction set classes. These must inherit from the InstructionSet class
        """
        # setup CPU states
-        self.pc = 0
-        self.cycle = 0
-        self.exit: bool = False
-        self.exit_code: int = 0
-        self.conf = conf
-        self.active_debug = False  # if a debugging session is currently runnign
-
-        self.stack: typing.Optional['riscemu.LoadedMemorySection'] = None
-
-        # setup MMU, registers and syscall handlers
-        self.mmu = MMU(conf)
-        self.regs = Registers(conf)
-        self.syscall_int = SyscallInterface()
-
-        # load all instruction sets
-        self.instruction_sets: List[riscemu.InstructionSet] = list()
-        self.instructions: Dict[str, Callable[[LoadedInstruction], None]] = dict()
-        for set_class in instruction_sets:
-            ins_set = set_class(self)
-            self.instructions.update(ins_set.load())
-            self.instruction_sets.append(ins_set)
-
-        # provide global syscall symbols if option is set
-        if conf.include_scall_symbols:
-            self.mmu.global_symbols.update(get_syscall_symbols())
-
-    def get_tokenizer(self, tokenizer_input):
-        """
-        Returns a tokenizer that respects the language of the CPU
-
-        :param tokenizer_input: an instance of the RiscVTokenizerInput class
-        """
-        return RiscVTokenizer(tokenizer_input, self.all_instructions())
-
-    def load(self, e: riscemu.Executable):
-        """
-        Load an executable into Memory
-        """
-        return self.mmu.load_bin(e)
-
-    def run_loaded(self, le: 'riscemu.LoadedExecutable'):
-        """
-        Run a loaded executable
-        """
-        self.pc = le.run_ptr
-
-        if self.conf.stack_size > 0:
-            self.stack = self.mmu.allocate_section("stack", self.conf.stack_size, MemoryFlags(False, False))
-            self.regs.set('sp', self.stack.base + self.stack.size)
-            print(FMT_CPU + '[CPU] Allocated {} bytes of stack'.format(self.stack.size) + FMT_NONE)
+        super().__init__(MMU(), instruction_sets, conf)

-        print(FMT_CPU + '[CPU] Started running from 0x{:08X} ({})'.format(le.run_ptr, le.name) + FMT_NONE)
-        self._run()
+        self.exit_code = 0

-    def continue_from_debugger(self, verbose=True):
-        """
-        called from the debugger to continue running
+        # setup syscall interface
+        self.syscall_int = SyscallInterface()

-        :param verbose: If True, will print each executed instruction to STDOUT
-        """
-        self._run(verbose)
+        # add global syscall symbols, but don't overwrite any user-defined symbols
+        syscall_symbols = get_syscall_symbols()
+        syscall_symbols.update(self.mmu.global_symbols)
+        self.mmu.global_symbols.update(syscall_symbols)

-    def step(self):
+    def step(self, verbose=False):
        """
        Execute a single instruction, then return.
        """
-        if self.exit:
+        if self.halted:
            print(FMT_CPU + "[CPU] Program exited with code {}".format(self.exit_code) + FMT_NONE)
-        else:
-            try:
-                self.cycle += 1
-                ins = self.mmu.read_ins(self.pc)
-                print(FMT_CPU + "   Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins))
-                self.pc += self.INS_XLEN
-                self.run_instruction(ins)
-            except LaunchDebuggerException:
-                print(FMT_CPU + "[CPU] Returning to debugger!" + FMT_NONE)
-            except RiscemuBaseException as ex:
-                self.pc -= self.INS_XLEN
-                print(ex.message())
+            return
+
+        launch_debugger = False

-    def _run(self, verbose=False):
-        if self.pc <= 0:
-            return False
-        ins = None
        try:
-            while not self.exit:
-                self.cycle += 1
-                ins = self.mmu.read_ins(self.pc)
-                if verbose:
-                    print(FMT_CPU + "   Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins))
-                self.pc += self.INS_XLEN
-                self.run_instruction(ins)
+            self.cycle += 1
+            ins = self.mmu.read_ins(self.pc)
+            if verbose:
+                print(FMT_CPU + "   Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins))
+            self.pc += self.INS_XLEN
+            self.run_instruction(ins)
        except RiscemuBaseException as ex:
-            if not isinstance(ex, LaunchDebuggerException):
-                print(FMT_ERROR + "[CPU] excpetion caught at 0x{:08X}: {}:".format(self.pc - 1, ins) + FMT_NONE)
+            if isinstance(ex, LaunchDebuggerException):
+                # if the debugger is active, raise the exception to
+                if self.debugger_active:
+                    raise ex
+
+                print(FMT_CPU + '[CPU] Debugger launch requested!' + FMT_NONE)
+                launch_debugger = True
+            else:
                print(ex.message())
-                self.pc -= self.INS_XLEN
-
-            if self.active_debug:
-                print(FMT_CPU + "[CPU] Returning to debugger!" + FMT_NONE)
-                return
-            if self.conf.debug_on_exception:
-                launch_debug_session(self, self.mmu, self.regs, "Exception encountered, launching debug:")
-
-        if self.exit:
-            print()
-            print(FMT_CPU + "Program exited with code {}".format(self.exit_code) + FMT_NONE)
-            sys.exit(self.exit_code)
-        else:
-            print()
-            print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE)
-
-    def run_instruction(self, ins: 'LoadedInstruction'):
-        """
-        Execute a single instruction
+                ex.print_stacktrace()
+                print(FMT_CPU + '[CPU] Halting due to exception!' + FMT_NONE)
+                self.halted = True

-        :param ins: The instruction to execute
-        """
-        if ins.name in self.instructions:
-            self.instructions[ins.name](ins)
-        else:
-            # this should never be reached, as unknown instructions are imparseable
-            raise RuntimeError("Unknown instruction: {}".format(ins))
+        if launch_debugger:
+            launch_debug_session(self)

-    def all_instructions(self) -> List[str]:
-        """
-        Return a list of all instructions this CPU can execute.
-        """
-        return list(self.instructions.keys())
+    def run(self, verbose=False):
+        while not self.halted:
+            self.step(verbose)
+
+        print(FMT_CPU + "[CPU] Program exited with code {}".format(self.exit_code) + FMT_NONE)

-    def __repr__(self):
+    def setup_stack(self, stack_size=1024 * 4) -> bool:
        """
-        Returns a representation of the CPU and some of its state.
+        Create program stack and populate stack pointer
+        :param stack_size: the size of the required stack, defaults to 4Kib
+        :return:
        """
-        return "{}(pc=0x{:08X}, cycle={}, exit={}, instructions={})".format(
-            self.__class__.__name__,
-            self.pc,
-            self.cycle,
-            self.exit,
-            " ".join(s.name for s in self.instruction_sets)
+        stack_sec = BinaryDataMemorySection(
+            bytearray(stack_size),
+            '.stack',
+            None,  # FIXME: why does a binary data memory section require a context?
+            '',
+            0
        )
+
+        if not self.mmu.load_section(stack_sec, fixed_position=False):
+            return False
+
+        self.regs.set('sp', Int32(stack_sec.base + stack_sec.size))
+        return True
+
+    @classmethod
+    def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
+        return [AssemblyFileLoader]
--- a/riscemu/Executable.py
+++ b/riscemu/Executable.py
@ -1,319 +0,0 @@
-"""
-RiscEmu (c) 2021 Anton Lydike
-
-SPDX-License-Identifier: MIT
-
-This file holds Executable and LoadedExecutable classes as well as loading and some linking code.
-
-FIXME: refactor this code into muliple files
-"""
-
-from dataclasses import dataclass, field
-from typing import Dict, List, Tuple, Union, Optional
-from .Exceptions import *
-from .helpers import *
-from math import log
-
-import typing
-
-if typing.TYPE_CHECKING:
-    from .Tokenizer import RiscVInstructionToken
-
-
-@dataclass(frozen=True)
-class MemoryFlags:
-    read_only: bool
-    executable: bool
-
-    def __repr__(self):
-        return "{}({},{})".format(
-            self.__class__.__name__,
-            'ro' if self.read_only else 'rw',
-            'x' if self.executable else '-'
-        )
-
-
-@dataclass
-class MemorySection:
-    name: str
-    flags: MemoryFlags
-    size: int = 0
-    content: List[bytearray] = field(default_factory=list)
-
-    def add(self, data: bytearray):
-        self.content.append(data)
-        self.size += len(data)
-
-    def continuous_content(self, parent: 'LoadedExecutable'):
-        """
-        converts the content into one continuous bytearray
-        """
-        if self.size == 0:
-            return bytearray(0)
-        content = self.content[0]
-        for b in self.content[1:]:
-            content += b
-        return content
-
-
-@dataclass
-class InstructionMemorySection(MemorySection):
-    content: List['RiscVInstructionToken'] = field(default_factory=list)
-
-    def add_insn(self, insn: 'RiscVInstructionToken'):
-        self.content.append(insn)
-        self.size += 1
-
-    def continuous_content(self, parent: 'LoadedExecutable'):
-        return [
-            LoadedInstruction(ins.instruction, ins.args, parent)
-            for ins in self.content
-        ]
-
-
-@dataclass()
-class Executable:
-    run_ptr: Tuple[str, int]
-    sections: Dict[str, MemorySection]
-    symbols: Dict[str, Tuple[str, int]]
-    exported_symbols: List[str]
-    name: str
-
-    def __repr__(self):
-        return "{}(sections = {}, symbols = {}, run_ptr = {}, globals={})".format(
-            self.__class__.__name__,
-            " ".join(self.sections.keys()),
-            " ".join(self.symbols.keys()),
-            self.run_ptr,
-            ",".join(self.exported_symbols)
-        )
-
-
-### LOADING CODE
-
-
-@dataclass(frozen=True)
-class LoadedInstruction:
-    """
-    An instruction which is loaded into memory. It knows the binary it belongs to to resolve symbols
-    """
-    name: str
-    args: List[str]
-    bin: 'LoadedExecutable'
-
-    def get_imm(self, num: int):
-        """
-        parse and get immediate argument
-        """
-        if len(self.args) <= num:
-            raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
-        arg = self.args[num]
-        # look up symbols
-        if self.bin.has_symb(arg):
-            return self.bin.lookup_symbol(arg)
-        return parse_numeric_argument(arg)
-
-    def get_imm_reg(self, num: int):
-        """
-        parse and get an argument imm(reg)
-        """
-        if len(self.args) <= num:
-            raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
-        arg = self.args[num]
-        ASSERT_IN("(", arg)
-        imm, reg = arg[:-1].split("(")
-        if self.bin.has_symb(imm):
-            return self.bin.lookup_symbol(imm), reg
-        return parse_numeric_argument(imm), reg
-
-    def get_reg(self, num: int):
-        """
-        parse and get an register argument
-        """
-        if len(self.args) <= num:
-            raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
-        return self.args[num]
-
-    def __repr__(self):
-        return "{} {}".format(self.name, ", ".join(self.args))
-
-
-@dataclass(frozen=True)
-class LoadedMemorySection:
-    """
-    A section which is loaded into memory
-    """
-    name: str
-    base: int
-    size: int
-    content: Union[List[LoadedInstruction], bytearray] = field(repr=False)
-    flags: MemoryFlags
-    owner: str
-
-    def read(self, offset: int, size: int):
-        if offset < 0:
-            raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, size, 'read')
-        if offset + size > self.size:
-            raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
-                                        size, 'read')
-        return self.content[offset: offset + size]
-
-    def read_instruction(self, offset):
-        if not self.flags.executable:
-            raise MemoryAccessException('Section not executable!', self.base + offset, 1, 'read exec')
-
-        if offset < 0:
-            raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'read exec')
-        if offset >= self.size:
-            raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
-                                        1, 'read exec')
-        return self.content[offset]
-
-    def write(self, offset, size, data):
-        if self.flags.read_only:
-            raise MemoryAccessException('Section not writeable {}'.format(self.name), self.base + offset, size, 'write')
-
-        if offset < 0:
-            raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'write')
-        if offset >= self.size:
-            raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
-                                        size, 'write')
-
-        for i in range(size):
-            self.content[offset + i] = data[i]
-
-    def dump(self, at_addr=None, fmt='hex', max_rows=10, group=4, bytes_per_row=16, all=False):
-        highlight = -1
-        if at_addr is None:
-            at_addr = self.base
-        else:
-            highlight = at_addr - self.base
-
-        at_off = at_addr - self.base
-        start = max(align_addr(at_off - ((max_rows * bytes_per_row) // 2), 8) - 8, 0)
-        if all:
-            end = self.size
-            start = 0
-        else:
-            end = min(start + (max_rows * bytes_per_row), self.size)
-
-        fmt_str = "    0x{:0" + str(ceil(log(self.base + end, 16))) + "X}:  {}"
-
-        if self.flags.executable:
-            # this section holds instructions!
-            start = 0 if all else max(at_off - (max_rows // 2), 0)
-            end = self.size if all else min(self.size, start + max_rows)
-            print(FMT_MEM + "{}, viewing {} instructions:".format(
-                self, end - start
-            ) + FMT_NONE)
-            for i in range(start, end):
-                if i == highlight:
-                    ins = FMT_UNDERLINE + FMT_ORANGE + repr(self.content[i]) + FMT_NONE
-                else:
-                    ins = repr(self.content[i])
-                print(fmt_str.format(self.base + i, ins))
-        else:
-            print(FMT_MEM + "{}, viewing {} bytes:".format(
-                self, end - start
-            ) + FMT_NONE)
-            for i in range(0, end - start, bytes_per_row):
-                data = self.content[start + i: min(start + i + bytes_per_row, end)]
-                if start + i <= highlight <= start + i + bytes_per_row:
-                    # do hightlight here!
-                    hi_ind = (highlight - start - i) // group
-                    print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group, highlight=hi_ind)))
-                else:
-                    print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group)))
-        if end == self.size:
-            print(FMT_MEM + "End of section!" + FMT_NONE)
-        else:
-            print(FMT_MEM + "More bytes ..." + FMT_NONE)
-
-    def __repr__(self):
-        return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
-            self.__class__.__name__,
-            self.name,
-            self.base,
-            self.size,
-            self.flags,
-            self.owner
-        )
-
-
-class LoadedExecutable:
-    """
-    This represents an executable which is loaded into memory at address base_addr
-
-    This is basicalle the "loader" in normal system environments
-    It initializes the stack and heap
-
-    It still holds a symbol table, that is not accessible memory since I don't want to deal with
-    binary strings in memory etc.
-    """
-    name: str
-    base_addr: int
-    sections_by_name: Dict[str, LoadedMemorySection]
-    sections: List[LoadedMemorySection]
-    symbols: Dict[str, int]
-    run_ptr: int
-    exported_symbols: Dict[str, int]
-    global_symbol_table: Dict[str, int]
-
-    def __init__(self, exe: Executable, base_addr: int, global_symbol_table: Dict[str, int]):
-        self.name = exe.name
-        self.base_addr = base_addr
-        self.sections = list()
-        self.sections_by_name = dict()
-        self.symbols = dict()
-        self.exported_symbols = dict()
-        self.global_symbol_table = global_symbol_table
-
-        curr = base_addr
-        for sec in exe.sections.values():
-            loaded_sec = LoadedMemorySection(
-                sec.name,
-                curr,
-                sec.size,
-                sec.continuous_content(self),
-                sec.flags,
-                self.name
-            )
-            self.sections.append(loaded_sec)
-            self.sections_by_name[loaded_sec.name] = loaded_sec
-            curr = align_addr(loaded_sec.size + curr)
-
-        for name, (sec_name, offset) in exe.symbols.items():
-            if sec_name == '_static_':
-                self.symbols[name] = offset
-            else:
-                ASSERT_IN(sec_name, self.sections_by_name)
-                self.symbols[name] = self.sections_by_name[sec_name].base + offset
-
-        for name in exe.exported_symbols:
-            self.exported_symbols[name] = self.symbols[name]
-
-        self.size = curr - base_addr
-
-        # translate run_ptr from executable
-        run_ptr_sec, run_ptr_off = exe.run_ptr
-        self.run_ptr = self.sections_by_name[run_ptr_sec].base + run_ptr_off
-
-    def lookup_symbol(self, name):
-        if name in self.symbols:
-            return self.symbols[name]
-        if name in self.global_symbol_table:
-            return self.global_symbol_table[name]
-        raise LinkerException('Symbol {} not found!'.format(name), (self,))
-
-    def __repr__(self):
-        return '{}[{}](base=0x{:08X}, size={}bytes, sections={}, run_ptr=0x{:08X})'.format(
-            self.__class__.__name__,
-            self.name,
-            self.base_addr,
-            self.size,
-            " ".join(self.sections_by_name.keys()),
-            self.run_ptr
-        )
-
-    def has_symb(self, arg):
-        return arg in self.symbols or arg in self.global_symbol_table
--- a/riscemu/ExecutableParser.py
+++ b/riscemu/ExecutableParser.py
@ -1,193 +0,0 @@
-"""
-RiscEmu (c) 2021 Anton Lydike
-
-SPDX-License-Identifier: MIT
-
-This file holds the parser that parses the tokenizer output.
-"""
-
-from .helpers import parse_numeric_argument, int_to_bytes
-from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags
-from .Exceptions import *
-
-from .Tokenizer import RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, RiscVPseudoOpToken
-
-from typing import Dict, Tuple, List, Optional
-
-
-class ExecutableParser:
-    """
-    Parses output form the RiscVTokenizer
-    """
-    tokenizer: 'RiscVTokenizer'
-
-    def __init__(self, tokenizer: 'RiscVTokenizer'):
-        self.instructions: List[RiscVInstructionToken] = list()
-        self.symbols: Dict[str, Tuple[str, int]] = dict()
-        self.sections: Dict[str, MemorySection] = dict()
-        self.tokenizer = tokenizer
-        self.active_section: Optional[str] = None
-        self.implicit_sections = False
-        self.globals: List[str] = list()
-
-    def parse(self) -> Executable:
-        """
-        parse tokenizer output into an executable
-        :return: the parsed executable
-        :raise ParseException: Raises a ParseException when invalid input is read
-        """
-        for token in self.tokenizer.tokens:
-            if isinstance(token, RiscVInstructionToken):
-                self.parse_instruction(token)
-            elif isinstance(token, RiscVSymbolToken):
-                self.handle_symbol(token)
-            elif isinstance(token, RiscVPseudoOpToken):
-                self.handle_pseudo_op(token)
-        return self._get_execuable()
-
-    def _get_execuable(self) -> Executable:
-        start_ptr = ('text', 0)
-        if '_start' in self.symbols:
-            start_ptr = self.symbols['_start']
-        elif 'main' in self.symbols:
-            start_ptr = self.symbols['main']
-        return Executable(start_ptr, self.sections, self.symbols, self.globals, self.tokenizer.name)
-
-    def parse_instruction(self, ins: 'RiscVInstructionToken') -> None:
-        """
-        parses an Instruction token
-        :param ins: the instruction token
-        """
-        if self.active_section is None:
-            self.op_text()
-            self.implicit_sections = True
-
-        ASSERT_EQ(self.active_section, 'text')
-        sec = self._curr_sec()
-        if isinstance(sec, InstructionMemorySection):
-            sec.add_insn(ins)
-        else:
-            raise ParseException("SHOULD NOT BE REACHED")
-
-    def handle_symbol(self, token: 'RiscVSymbolToken'):
-        """
-        Handle a symbol token (such as 'main:')
-        :param token: the symbol token
-        """
-        ASSERT_NOT_IN(token.name, self.symbols)
-        ASSERT_NOT_NULL(self.active_section)
-        sec_pos = self._curr_sec().size
-        self.symbols[token.name] = (self.active_section, sec_pos)
-
-    def handle_pseudo_op(self, op: 'RiscVPseudoOpToken'):
-        """
-        Handle a pseudo op token (such as '.word 0xffaabbcc')
-        :param op: the peseudo-op token
-        """
-        name = 'op_' + op.name
-        if hasattr(self, name):
-            getattr(self, name)(op)
-        else:
-            raise ParseException("Unknown pseudo op: {}".format(op), (op,))
-
-    ## Pseudo op implementations:
-    def op_section(self, op: 'RiscVPseudoOpToken'):
-        """
-        handles a .section token
-        :param op: The token
-        """
-        ASSERT_LEN(op.args, 1)
-        name = op.args[0][1:]
-        ASSERT_IN(name, ('data', 'rodata', 'text'))
-        getattr(self, 'op_' + name)(op)
-
-    def op_text(self, op: 'RiscVPseudoOpToken' = None):
-        """
-        handles a .text token
-        :param op: The token
-        """
-        self._set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection)
-
-    def op_data(self, op: 'RiscVPseudoOpToken' = None):
-        """
-        handles a .data token
-        :param op: The token
-        """
-        self._set_sec('data', MemoryFlags(read_only=False, executable=False))
-
-    def op_rodata(self, op: 'RiscVPseudoOpToken' = None):
-        """
-        handles a .rodata token
-        :param op: The token
-        """
-        self._set_sec('rodata', MemoryFlags(read_only=True, executable=False))
-
-    def op_space(self, op: 'RiscVPseudoOpToken'):
-        """
-        handles a .space token. Inserts empty space into the current (data or rodata) section
-        :param op: The token
-        """
-        ASSERT_IN(self.active_section, ('data', 'rodata'))
-        ASSERT_LEN(op.args, 1)
-        size = parse_numeric_argument(op.args[0])
-        self._curr_sec().add(bytearray(size))
-
-    def op_ascii(self, op: 'RiscVPseudoOpToken'):
-        """
-        handles a .ascii token. Inserts ascii encoded text into the currrent data section
-        :param op: The token
-        """
-        ASSERT_IN(self.active_section, ('data', 'rodata'))
-        ASSERT_LEN(op.args, 1)
-        str = op.args[0][1:-1].encode('ascii').decode('unicode_escape')
-        self._curr_sec().add(bytearray(str, 'ascii'))
-
-    def op_asciiz(self, op: 'RiscVPseudoOpToken'):
-        """
-        handles a .ascii token. Inserts nullterminated ascii encoded text into the currrent data section
-        :param op: The token
-        """
-        ASSERT_IN(self.active_section, ('data', 'rodata'))
-        ASSERT_LEN(op.args, 1)
-        str = op.args[0][1:-1].encode('ascii').decode('unicode_escape')
-        self._curr_sec().add(bytearray(str + '\0', 'ascii'))
-
-    def op_global(self, op: 'RiscVPseudoOpToken'):
-        """
-        handles a .global token. Marks the token as global
-        :param op: The token
-        """
-        ASSERT_LEN(op.args, 1)
-        name = op.args[0]
-        self.globals.append(name)
-
-    def op_set(self, op: 'RiscVPseudoOpToken'):
-        """
-        handles a .set name, val token. Sets the symbol name to val
-        :param op: The token
-        """
-        ASSERT_LEN(op.args, 2)
-        name = op.args[0]
-        val = parse_numeric_argument(op.args[1])
-        self.symbols[name] = ('_static_', val)
-
-    def op_align(self, op: 'RiscVPseudoOpToken'):
-        """
-        handles an align token. Currently a nop (just not implemented fully yet, as linker handles most alignement tasks)
-        :param op: The token
-        """
-        pass
-
-    def op_word(self, op: 'RiscVPseudoOpToken'):
-        ASSERT_LEN(op.args, 1)
-        val = parse_numeric_argument(op.args[0])
-        self._curr_sec().add(int_to_bytes(val, 4))
-
-    ## Section handler code
-    def _set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection):
-        if name not in self.sections:
-            self.sections[name] = cls(name, flags)
-        self.active_section = name
-
-    def _curr_sec(self):
-        return self.sections[self.active_section]
--- a/riscemu/IO/IOModule.py
+++ b/riscemu/IO/IOModule.py
@ -1,22 +1,22 @@
 from abc import ABC, abstractmethod
+from typing import Optional

+from riscemu.types import MemorySection, MemoryFlags, T_RelativeAddress

-class IOModule(ABC):
-    addr: int
-    size: int

-    def __init__(self, addr: int, size: int):
-        self.addr = addr
-        self.size = size
+class IOModule(MemorySection, ABC):
+    def __init__(self, name: str, flags: MemoryFlags, size: int, owner: str = 'system', base: int = 0):
+        super(IOModule, self).__init__(name, flags, size, base, owner, None)

-    @abstractmethod
-    def read(self, addr: int, size: int):
-        pass
+    def contains(self, addr, size: int = 0):
+        return self.base <= addr < self.base + self.size and \
+               self.base <= addr + size <= self.base + self.size

-    @abstractmethod
-    def write(self, addr: int, data: bytearray, size: int):
-        pass
+    def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
+             bytes_per_row: int = 16, rows: int = 10, group: int = 4):
+        print(self)

-    def contains(self, addr, size: int = 0):
-        return self.addr <= addr < self.addr + self.size and \
-               self.addr <= addr + size <= self.addr + self.size
+    def __repr__(self):
+        return "{}[{}] at 0x{:0X} (size={}bytes, flags={})".format(
+            self.__class__.__name__, self.name, self.base, self.size, self.flags
+        )
--- a/riscemu/IO/TextIO.py
+++ b/riscemu/IO/TextIO.py
@ -1,70 +1,28 @@
 from .IOModule import IOModule
 from ..priv.Exceptions import InstructionAccessFault
-from ..helpers import int_from_bytes
-from threading import Thread
-import time
+from ..types import T_RelativeAddress, Instruction, MemoryFlags, Int32


-def _window_loop(textIO: 'TextIO'):
-    try:
-        import PySimpleGUI as sg
-
-        logs = sg.Text(font="monospace")
-        col = sg.Column([[logs]], size=(640, 400), scrollable=True)
-        window = sg.Window("TextIO:{:x}".format(textIO.addr), [[col]])
-        lines = list()
-
-        window.finalize()
-        textIO.set_sg_window(window)
-        while True:
-            e, v = window.read()
-            if e == sg.WINDOW_CLOSED:
-                window.close()
-                textIO.set_sg_window(None)
-                break
-            if e == 'putlog':
-                lines.insert(0, v[0])
-                logs.update(value='\n'.join(lines) + '\n')
-                col.contents_changed()
-
-    except ImportError:
-        print("[TextIO] window disabled - please install PySimpleGui!")
-        textIO.set_sg_window(None)
-
 class TextIO(IOModule):
-    def __init__(self, addr: int, buflen: int = 128):
-        super(TextIO, self).__init__(addr, buflen + 4)
+    def read_ins(self, offset: T_RelativeAddress) -> Instruction:
+        raise InstructionAccessFault(self.base + offset)
+
+    def __init__(self, base: int, buflen: int = 128):
+        super(TextIO, self).__init__('TextIO', MemoryFlags(False, False), buflen + 4, base=base)
        self.buff = bytearray(buflen)
        self.current_line = ""
-        self.sg_window = None
-        self.start_buffer = list()
-
-        self.thread = Thread(target=_window_loop, args=(self,))
-        self.thread.start()
-        time.sleep(0.1)
-
-    def set_sg_window(self, window):
-        if self.sg_window is not None and window is not None:
-            raise Exception("cannot set window twice!")
-        self.sg_window = window
-
-        buff = self.start_buffer
-        self.start_buffer = None if window is None else list()
-
-        for line in buff:
-            self._present(line)

    def read(self, addr: int, size: int) -> bytearray:
-        raise InstructionAccessFault(addr)
+        raise InstructionAccessFault(self.base + addr)

-    def write(self, addr: int, data: bytearray, size: int):
-        if addr == self.addr:
+    def write(self, addr: int, size: int, data: bytearray):
+        if addr == 0:
            if size > 4:
                raise InstructionAccessFault(addr)
-            if int_from_bytes(data[0:4]) > 0:
+            if Int32(data) != 0:
                self._print()
                return
-        buff_start = addr - self.addr - 4
+        buff_start = addr - 4
        self.buff[buff_start:buff_start + size] = data[0:size]

    def _print(self):
@ -83,10 +41,4 @@ class TextIO(IOModule):
            self.current_line += text

    def _present(self, text: str):
-        if self.sg_window is not None:
-            self.sg_window.write_event_value('putlog', text)
-        elif self.start_buffer is not None:
-            self.start_buffer.append(text)
-        else:
-            print("[TextIO:{:x}] {}".format(self.addr, text))
-
+        print("[TextIO:{:x}] {}".format(self.base, text))
--- a/riscemu/MMU.py
+++ b/riscemu/MMU.py
@ -4,17 +4,20 @@ RiscEmu (c) 2021 Anton Lydike
 SPDX-License-Identifier: MIT
 """

-from .Config import RunConfig
-from .Executable import Executable, LoadedExecutable, LoadedMemorySection, LoadedInstruction, MemoryFlags
-from .helpers import align_addr, int_from_bytes
-from .Exceptions import OutOfMemoryException, InvalidAllocationException
+from typing import Dict, List, Optional, Union
+
 from .colors import *
-from typing import Dict, List, Tuple, Optional
+from .helpers import align_addr
+from .types import Instruction, MemorySection, MemoryFlags, T_AbsoluteAddress, \
+    Program, InstructionContext, Int32
+from .types.exceptions import InvalidAllocationException, MemoryAccessException


 class MMU:
    """
-    The MemoryManagementUnit (handles loading binaries, and reading/writing data)
+    The MemoryManagementUnit. This provides a unified interface for reading/writing data from/to memory.
+
+    It also provides various translations for addresses.
    """

    max_size = 0xFFFFFFFF
@ -27,19 +30,14 @@ class MMU:
    No single allocation can be bigger than 64 MB
    """

-    sections: List[LoadedMemorySection]
+    sections: List[MemorySection]
    """
    A list of all loaded memory sections
    """

-    binaries: List[LoadedExecutable]
-    """
-    A list of all loaded executables
-    """
-
-    last_bin: Optional[LoadedExecutable] = None
+    programs: List[Program]
    """
-    The last loaded executable (the next executable is inserted directly after this one)
+    A list of all loaded programs
    """

    global_symbols: Dict[str, int]
@ -47,79 +45,15 @@ class MMU:
    The global symbol table
    """

-    last_ins_sec: Optional[LoadedMemorySection]
-
-    def __init__(self, conf: RunConfig):
-        """
-        Create a new MMU, respecting the active RunConfiguration
-
-        :param conf: The config to respect
-        """
-        self.sections: List[LoadedMemorySection] = list()
-        self.binaries: List[LoadedExecutable] = list()
-        self.first_free_addr: int = 0x100
-        self.conf: RunConfig = conf
-        self.global_symbols: Dict[str, int] = dict()
-        self.last_ins_sec = None
-
-    def load_bin(self, exe: Executable) -> LoadedExecutable:
-        """
-        Load an executable into memory
-
-        :param exe: the executable to load
-        :return: A LoadedExecutable
-        :raises OutOfMemoryException: When all memory is used
+    def __init__(self):
        """
-
-        # align to 8 byte word
-        addr = align_addr(self.first_free_addr)
-
-        loaded_bin = LoadedExecutable(exe, addr, self.global_symbols)
-
-        if loaded_bin.size + addr > self.max_size:
-            raise OutOfMemoryException('load of executable')
-
-        self.binaries.append(loaded_bin)
-        self.first_free_addr = loaded_bin.base_addr + loaded_bin.size
-
-        # read sections into sec dict
-        for sec in loaded_bin.sections:
-            self.sections.append(sec)
-
-        self.global_symbols.update(loaded_bin.exported_symbols)
-
-        print(FMT_MEM + "[MMU] Successfully loaded{}: {}".format(FMT_NONE, loaded_bin))
-
-        return loaded_bin
-
-    def allocate_section(self, name: str, req_size: int, flag: MemoryFlags):
+        Create a new MMU
        """
-        Used to allocate a memory region (data only). Use `load_bin` if you want to load a binary, this is used for
-        stack and maybe malloc in the future.
+        self.programs = list()
+        self.sections = list()
+        self.global_symbols = dict()

-        :param name: Name of the section to allocate
-        :param req_size: The requested size
-        :param flag: The flags protecting this memory section
-        :return: The LoadedMemorySection
-        """
-        if flag.executable:
-            raise InvalidAllocationException('cannot allocate executable section', name, req_size, flag)
-
-        if req_size < 0:
-            raise InvalidAllocationException('Invalid size request', name, req_size, flag)
-
-        if req_size > self.max_alloc_size:
-            raise InvalidAllocationException('Cannot allocate more than {} bytes at a time'.format(self.max_alloc_size),
-                                             name, req_size, flag)
-
-        base = align_addr(self.first_free_addr)
-        size = align_addr(req_size)
-        sec = LoadedMemorySection(name, base, size, bytearray(size), flag, "<runtime>")
-        self.sections.append(sec)
-        self.first_free_addr = base + size
-        return sec
-
-    def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]:
+    def get_sec_containing(self, addr: T_AbsoluteAddress) -> Optional[MemorySection]:
        """
        Returns the section that contains the address addr

@ -131,31 +65,27 @@ class MMU:
                return sec
        return None

-    def get_bin_containing(self, addr: int) -> Optional[LoadedExecutable]:
-        for exe in self.binaries:
-            if exe.base_addr <= addr < exe.base_addr + exe.size:
-                return exe
+    def get_bin_containing(self, addr: T_AbsoluteAddress) -> Optional[Program]:
+        for program in self.programs:
+            if program.base <= addr < program.base + program.size:
+                return program
        return None

-    def read_ins(self, addr: int) -> LoadedInstruction:
+    def read_ins(self, addr: T_AbsoluteAddress) -> Instruction:
        """
        Read a single instruction located at addr

        :param addr: The location
        :return: The Instruction
        """
-        sec = self.last_ins_sec
-        if sec is not None and sec.base <= addr < sec.base + sec.size:
-            return sec.read_instruction(addr - sec.base)
        sec = self.get_sec_containing(addr)
-        self.last_ins_sec = sec
        if sec is None:
-            print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! "
-                            "Have you forgotten an exit syscall or ret statement?" + FMT_NONE)
+            print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! (read at {}) ".format(addr)
+                  + "Have you forgotten an exit syscall or ret statement?" + FMT_NONE)
            raise RuntimeError("No next instruction available!")
-        return sec.read_instruction(addr - sec.base)
+        return sec.read_ins(addr - sec.base)

-    def read(self, addr: int, size: int) -> bytearray:
+    def read(self, addr: Union[int, Int32], size: int) -> bytearray:
        """
        Read size bytes of memory at addr

@ -163,10 +93,16 @@ class MMU:
        :param size: The number of bytes to read
        :return: The bytearray at addr
        """
+        if isinstance(addr, Int32):
+            breakpoint()
+            addr = addr.unsigned_value
        sec = self.get_sec_containing(addr)
+        if sec is None:
+            print(FMT_MEM + "[MMU] Trying to read data form invalid region at 0x{:x}! ".format(addr) + FMT_NONE)
+            raise MemoryAccessException("region is non-initialized!", addr, size, 'read')
        return sec.read(addr - sec.base, size)

-    def write(self, addr: int, size: int, data):
+    def write(self, addr: int, size: int, data: bytearray):
        """
        Write bytes into memory

@ -176,8 +112,8 @@ class MMU:
        """
        sec = self.get_sec_containing(addr)
        if sec is None:
-            print(FMT_MEM + '[MMU] Invalid write into non-initialized section at 0x{:08X}'.format(addr) + FMT_NONE)
-            raise RuntimeError("No write pls")
+            print(FMT_MEM + '[MMU] Invalid write into non-initialized region at 0x{:08X}'.format(addr) + FMT_NONE)
+            raise MemoryAccessException("region is non-initialized!", addr, size, 'write')

        return sec.write(addr - sec.base, size, data)

@ -195,7 +131,7 @@ class MMU:
            return
        sec.dump(addr, *args, **kwargs)

-    def symbol(self, symb: str):
+    def label(self, symb: str):
        """
        Look up the symbol symb in all local symbol tables (and the global one)

@ -204,14 +140,152 @@ class MMU:
        print(FMT_MEM + "[MMU] Lookup for symbol {}:".format(symb) + FMT_NONE)
        if symb in self.global_symbols:
            print("   Found global symbol {}: 0x{:X}".format(symb, self.global_symbols[symb]))
-        for b in self.binaries:
-            if symb in b.symbols:
-                print("   Found local symbol {}: 0x{:X} in {}".format(symb, b.symbols[symb], b.name))
+        for bin in self.programs:
+            if symb in bin.context.labels:
+                print("   Found local labels {}: 0x{:X} in {}".format(symb, bin.context.labels[symb], bin.name))
+
+    def read_int(self, addr: int) -> Int32:
+        return Int32(self.read(addr, 4))
+
+    def translate_address(self, address: T_AbsoluteAddress) -> str:
+        sec = self.get_sec_containing(address)
+        if not sec:
+            return "unknown at 0x{:0x}".format(address)
+
+        bin = self.get_bin_containing(address)
+        secs = set(sec.name for sec in bin.sections) if bin else []
+        elf_markers = {
+            '__global_pointer$', '_fdata', '_etext', '_gp',
+            '_bss_start', '_bss_end', '_ftext', '_edata', '_end', '_fbss'
+        }
+
+        def key(x):
+            name, val = x
+            return address - val
+
+        best_fit = iter(sorted(filter(lambda x: x[1] <= address, sec.context.labels.items()), key=key))
+
+        best = ('', float('inf'))
+        for name, val in best_fit:
+            if address - val < best[1]:
+                best = (name, val)
+            if address - val == best[1]:
+                if best[0] in elf_markers:
+                    best = (name, val)
+                elif best[0] in secs and name not in elf_markers:
+                    best = (name, val)
+
+        name, val = best
+
+        if not name:
+            return "unknown at 0x{:0x}".format(address)
+
+        return str('{}:{} at {} (0x{:0x}) + 0x{:0x}'.format(
+            sec.owner, sec.name, name, val, address - val
+        ))
+
+    def has_continous_free_region(self, start: int, end: int) -> bool:
+        # if we have no sections we are all good
+        if len(self.sections) == 0:
+            return True
+        # if the last section is located before the start we are also good
+        if start >= self.sections[-1].base + self.sections[-1].size:
+            return True

-    def read_int(self, addr: int) -> int:
-        return int_from_bytes(self.read(addr, 4))
+        for sec in self.sections:
+            # skip all sections that end before the required start point
+            if sec.base + sec.size <= start:
+                continue
+            # we now have the first section that doesn't end **before** the start point
+            # if this section starts after the specified end, we are good
+            if sec.base >= end:
+                return True
+            # otherwise we can't continue
+            return False
+        # if all sections end before the requested start we are good
+        # technically we shouldn't ever reach this point, but better safe than sorry
+        return True
+
+    def load_program(self, program: Program, align_to: int = 4):
+        if program.base is not None:
+            if not self.has_continous_free_region(program.base, program.base + program.size):
+                print(FMT_MEM + "Cannot load program {} into desired space (0x{:0x}-0x{:0x}), area occupied.".format(
+                    program.name, program.base, program.base + program.size
+                ) + FMT_NONE)
+                raise InvalidAllocationException("Area occupied".format(
+                    program.name, program.base, program.base + program.size
+                ), program.name, program.size, MemoryFlags(False, True))
+
+            at_addr = program.base
+        else:
+            at_addr = align_addr(self.get_guaranteed_free_address(), align_to)
+
+        # trigger the load event to set all addresses in the binary
+        program.loaded_trigger(at_addr)
+
+        # add program and sections to internal state
+        self.programs.append(program)
+        self.sections += program.sections
+        self._update_state()
+
+        # load all global symbols from program
+        self.global_symbols.update(
+            {key: program.context.labels[key] for key in program.global_labels}
+        )
+        # inject reference to global symbol table into program context
+        # FIXME: this is pretty unclean and should probably be solved in a better way in the future
+        program.context.global_symbol_dict = self.global_symbols
+
+    def load_section(self, sec: MemorySection, fixed_position: bool = False) -> bool:
+        if fixed_position:
+            if self.has_continous_free_region(sec.base, sec.base + sec.size):
+                self.sections.append(sec)
+                self._update_state()
+            else:
+                print(FMT_MEM + '[MMU] Cannot place section {} at {}, space is occupied!'.format(sec, sec.base))
+                return False
+        else:
+            at_addr = align_addr(self.get_guaranteed_free_address(), 8)
+            sec.base = at_addr
+            self.sections.append(sec)
+            self._update_state()
+            return True
+
+    def _update_state(self):
+        """
+        Called whenever a section or program is added to keep the list of programs and sections consistent
+        :return:
+        """
+        self.programs.sort(key=lambda bin: bin.base)
+        self.sections.sort(key=lambda sec: sec.base)
+
+    def get_guaranteed_free_address(self) -> T_AbsoluteAddress:
+        if len(self.sections) == 0:
+            return 0x100
+        else:
+            return self.sections[-1].base + self.sections[-1].size

    def __repr__(self):
-        return "MMU(\n\t{}\n)".format(
-            "\n\t".join(repr(x) for x in self.sections)
+        return "{}(\n\t{}\n)".format(
+            self.__class__.__name__,
+            "\n\t".join(repr(x) for x in self.programs)
        )
+
+    def context_for(self, addr: T_AbsoluteAddress) -> InstructionContext:
+        sec = self.get_sec_containing(addr)
+
+        if sec is not None:
+            return sec.context
+
+        return InstructionContext()
+
+    def report_addr(self, addr: T_AbsoluteAddress):
+        sec = self.get_sec_containing(addr)
+        if not sec:
+            print("addr is in no section!")
+            return
+        owner = [b for b in self.programs if b.name == sec.owner]
+        if owner:
+            print("owned by: {}".format(owner[0]))
+
+        print("{}: 0x{:0x} + 0x{:0x}".format(name, val, addr - val))
--- a/riscemu/Tokenizer.py
+++ b/riscemu/Tokenizer.py
@ -1,320 +0,0 @@
-"""
-RiscEmu (c) 2021 Anton Lydike
-
-SPDX-License-Identifier: MIT
-"""
-
-import re
-from enum import IntEnum
-from typing import List
-
-from .Exceptions import ParseException
-
-PSEUDO_OPS = [
-    '.asciiz',
-    '.double',
-    '.extern',
-    '.global',
-    '.align',
-    '.float',
-    '.kdata',
-    '.ktext',
-    '.space',
-    '.ascii',
-    '.byte',
-    '.data',
-    '.half',
-    '.text',
-    '.word',
-    '.set',
-]
-
-COMMENT_START = ["#", ";"]
-
-REG_VALID_SYMBOL_LABEL = re.compile(r'^([A-z_.][A-z_0-9.]*[A-z_0-9]|[A-z_]):')
-
-REG_WHITESPACE_UNTIL_NEWLINE = re.compile(r'^(\s*)\n')
-
-REG_WHITESPACE = re.compile(r'^\s*')
-
-REG_NONWHITESPACE = re.compile(r'^[^\s]*')
-
-REG_UNTIL_NEWLINE = re.compile(r'^[^\n]*')
-
-REG_WHITESPACE_NO_LINEBREAK = re.compile(r'^[ \t]*')
-
-REG_VALID_ARGUMENT = re.compile(
-    r'^([+-]?(0x[0-9A-f]+|[0-9]+)|[A-z_.][A-z0-9_.]*[A-z_0-9]|[A-z_])(\(([A-z_.][A-z_0-9.]*[A-z_0-9]|[A-z_])\))?'
-)
-
-REG_ARG_SPLIT = re.compile(r'^,[ \t]*')
-
-
-def split_accepting_quotes(string, at=REG_ARG_SPLIT, quotes=('"', "'")):
-    pos = 0
-    last_piece = 0
-    pieces = []
-    in_quotes = False
-    if string is None:
-        return pieces
-    while pos < len(string):
-        match = at.match(string[pos:])
-        if match is not None:
-            if not in_quotes:
-                pieces.append(string[last_piece:pos])
-                pos += len(match.group(0))
-                last_piece = pos
-            else:
-                pos += len(match.group(0))
-        elif string[pos] in quotes:
-            in_quotes = not in_quotes
-            pos += 1
-        elif string[pos] in COMMENT_START and not in_quotes:  # entering comment
-            break
-        else:
-            pos += 1
-    if in_quotes:
-        print("[Tokenizer.split] unbalanced quotes in \"{}\"!".format(string))
-    pieces.append(string[last_piece:pos])
-    return pieces
-
-
-class RiscVInput:
-    """
-    Represents an Assembly file
-    """
-    def __init__(self, content: str, name: str):
-        self.content = content
-        self.pos = 0
-        self.len = len(content)
-        self.name = name
-
-    @staticmethod
-    def from_file(src: str):
-        with open(src, 'r') as f:
-            return RiscVInput(f.read(), src)
-
-    def peek(self, offset: int = 0, size: int = 1, regex: re.Pattern = None, text: str = None, regex_group: int = 0):
-        at = self.pos + offset
-
-        if regex:
-            if not isinstance(regex, re.Pattern):
-                print("uncompiled regex passed to peek!")
-                regex = re.compile(regex)
-            match = regex.match(self.content[at:])
-            if match is None:
-                return None
-
-            if regex_group != 0 and not match.group(0).startswith(match.group(regex_group)):
-                print("Cannot peek regex group that does not start at match start!")
-                return None
-            return match.group(regex_group)
-        if text:
-            if self.content[at:].startswith(text):
-                return self.content[at:at + len(text)]
-            return False
-        return self.content[at:at + size]
-
-    def peek_one_of(self, options: List[str]):
-        longest_peek = 0
-        ret = False
-        for text in options:
-            if self.peek(text=text):
-                if len(text) > longest_peek:
-                    longest_peek = len(text)
-                    ret = text
-        return ret
-
-    def consume(self, size: int = 1, regex: re.Pattern = None, text: str = None, regex_group: int = 0):
-        at = self.pos
-
-        if regex:
-            if not isinstance(regex, re.Pattern):
-                print("uncompiled regex passed to peek!")
-                regex = re.compile(regex)
-            match = regex.match(self.content[at:])
-            if match is None:
-                return None
-
-            if regex_group != 0 and not match.group(0).startswith(match.group(regex_group)):
-                print("Cannot consume regex group that does not start at match start!")
-                return None
-            self.pos += len(match.group(regex_group))
-            return match.group(regex_group)
-
-        if text:
-            if self.content[at:].startswith(text):
-                self.pos += len(text)
-                return text
-            return None
-
-        self.pos += size
-        return self.content[at:at + size]
-
-    def consume_one_of(self, options: List[str]):
-        longest_peek = 0
-        ret = False
-        for text in options:
-            if self.peek(text=text):
-                if len(text) > longest_peek:
-                    longest_peek = len(text)
-                    ret = text
-        self.consume(text=ret)
-        return ret
-
-    def seek_newline(self):
-        return self.consume(regex=REG_WHITESPACE_UNTIL_NEWLINE, regex_group=1)
-
-    def consume_whitespace(self, linebreak=True):
-        if linebreak:
-            return self.consume(regex=REG_WHITESPACE)
-        return self.consume(regex=REG_WHITESPACE_NO_LINEBREAK)
-
-    def has_next(self):
-        return self.pos < self.len
-
-    def context(self, size: int = 5):
-        """
-        returns a context string:
-        <local input before pos>|<local input after pos>
-        """
-        start = max(self.pos - size, 0)
-        end = min(self.pos + size, self.len - 1)
-
-        return self.content[start:self.pos] + '|' + self.content[self.pos:end]
-
-
-class TokenType(IntEnum):
-    SYMBOL = 0
-    INSTRUCTION = 1
-    PSEUDO_OP = 2
-
-    def __repr__(self):
-        return self.name
-
-    def __str__(self):
-        return self.name
-
-
-class RiscVToken:
-    type: TokenType
-
-    def __init__(self, t_type: TokenType):
-        self.type = t_type
-
-    def __repr__(self):
-        return "{}[{}]({})".format(self.__class__.__name__, self.type, self.text())
-
-    def text(self):
-        """
-        create text representation of instruction
-        """
-        return "unknown"
-
-
-class RiscVInstructionToken(RiscVToken):
-    def __init__(self, name, args):
-        super().__init__(TokenType.INSTRUCTION)
-        self.instruction = name
-        self.args = args
-
-    def text(self):
-        if len(self.args) == 0:
-            return self.instruction
-        if len(self.args) == 1:
-            return "{} {}".format(self.instruction, self.args[0])
-        if len(self.args) == 2:
-            return "{} {}, {}".format(self.instruction, *self.args)
-        return "{} {}, {}, {}".format(self.instruction, *self.args)
-
-
-class RiscVSymbolToken(RiscVToken):
-    def __init__(self, name):
-        super().__init__(TokenType.SYMBOL)
-        self.name = name
-
-    def text(self):
-        return self.name
-
-
-class RiscVPseudoOpToken(RiscVToken):
-    def __init__(self, name, args):
-        super().__init__(TokenType.PSEUDO_OP)
-        self.name = name
-        self.args = args
-
-    def text(self):
-        return "{} {}".format(self.name, self.args)
-
-
-class RiscVTokenizer:
-    """
-    A tokenizer for the RISC-V syntax of a given CPU
-    """
-    def __init__(self, input_assembly: RiscVInput, instructions: List[str]):
-        self.input = input_assembly
-        self.tokens: List[RiscVToken] = []
-        self.name = input_assembly.name
-        self.instructions = instructions
-
-    def tokenize(self):
-        while self.input.has_next():
-            # remove leading whitespaces, place cursor at text start
-            self.input.consume_whitespace()
-
-            # check if we have a pseudo op
-            if self.input.peek_one_of(PSEUDO_OPS):
-                self.parse_pseudo_op()
-
-            # check if we have a symbol (like main:)
-            elif self.input.peek(regex=REG_VALID_SYMBOL_LABEL):
-                self.parse_symbol()
-
-            # comment
-            elif self.input.peek() in COMMENT_START:
-                self.parse_comment()
-
-            # must be instruction
-            elif self.input.peek_one_of(self.instructions):
-                self.parse_instruction()
-            else:
-                token = self.input.peek(size=5)
-                raise ParseException("Unknown token around {} at: {}".format(repr(token), repr(self.input.context())))
-            self.input.consume_whitespace()
-
-    def parse_pseudo_op(self):
-        name = self.input.consume_one_of(PSEUDO_OPS)
-        self.input.consume_whitespace(linebreak=False)
-
-        arg_str = self.input.consume(regex=REG_UNTIL_NEWLINE)
-        if not arg_str:
-            args = []
-        else:
-            args = split_accepting_quotes(arg_str)
-
-        self.tokens.append(RiscVPseudoOpToken(name[1:], args))
-
-    def parse_symbol(self):
-        name = self.input.consume(regex=REG_VALID_SYMBOL_LABEL)
-        self.tokens.append(RiscVSymbolToken(name[:-1]))
-        if not self.input.consume_whitespace():
-            print("[Tokenizer] symbol declaration should always be followed by whitespace (at {})!".format(
-                self.input.context()))
-
-    def parse_instruction(self):
-        ins = self.input.consume_one_of(self.instructions)
-        args = []
-        self.input.consume_whitespace(linebreak=False)
-        while self.input.peek(regex=REG_VALID_ARGUMENT) and len(args) < 3:
-            arg = self.input.consume(regex=REG_VALID_ARGUMENT)
-            args.append(arg)
-            if self.input.peek(text=','):
-                self.input.consume(text=',')
-                self.input.consume_whitespace(linebreak=False)
-            else:
-                break
-        self.tokens.append(RiscVInstructionToken(ins, args))
-
-    def parse_comment(self):
-        # just consume the rest
-        self.input.consume(regex=REG_UNTIL_NEWLINE)
--- a/riscemu/init.py
+++ b/riscemu/init.py
@ -8,24 +8,21 @@ This package aims at providing an all-round usable RISC-V emulator and debugger
 It contains everything needed to run assembly files, so you don't need any custom compilers or toolchains
 """

-from .Exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \
+from .types.exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \
    ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException

-from .Tokenizer import RiscVInput, RiscVTokenizer
-
-from .Executable import Executable, LoadedExecutable, LoadedMemorySection
-
-from .ExecutableParser import ExecutableParser
-
 from .instructions import *

 from .MMU import MMU
-from .Registers import Registers
-from .Syscall import SyscallInterface, Syscall
-from .CPU import CPU
+from .registers import Registers
+from .syscall import SyscallInterface, Syscall
+from .CPU import CPU, UserModeCPU
+from .debug import launch_debug_session
+
+from .config import RunConfig

-from .Config import RunConfig
+from .parser import tokenize, parse_tokens, AssemblyFileLoader

 __author__ = "Anton Lydike <Anton@Lydike.com>"
-__copyright__ = "Copyright 2021 Anton Lydike"
-__version__ = '1.0.0'
+__copyright__ = "Copyright 2022 Anton Lydike"
+__version__ = '2.0.0a4'
--- a/riscemu/main.py
+++ b/riscemu/main.py
@ -5,16 +5,25 @@ SPDX-License-Identifier: MIT

 This file holds the logic for starting the emulator from the CLI
 """
+from riscemu import RiscemuBaseException, __copyright__, __version__
+from riscemu.CPU import UserModeCPU

 if __name__ == '__main__':
-    from . import *
-    from .helpers import *
+    from .config import RunConfig
    from .instructions import InstructionSetDict
+    from .colors import FMT_BOLD, FMT_MAGENTA
+    from .parser import AssemblyFileLoader
    import argparse
    import sys

    all_ins_names = list(InstructionSetDict.keys())

+    if '--version' in sys.argv:
+        print("riscemu version {}\n{}\n\nAvailable ISA: {}".format(
+            __version__, __copyright__,
+            ", ".join(InstructionSetDict.keys())
+        ))
+        sys.exit()

    class OptionStringAction(argparse.Action):
        def __init__(self, option_strings, dest, keys=None, omit_empty=False, **kwargs):
@ -64,6 +73,12 @@ if __name__ == '__main__':

    parser.add_argument('--stack_size', type=int, help='Stack size of loaded programs, defaults to 8MB', nargs='?')

+    parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count',
+                        default=0)
+
+    parser.add_argument('--interactive', help="Launch the interactive debugger instantly instead of loading any "
+                                              "programs", action='store_true')
+
    args = parser.parse_args()

    # create a RunConfig from the cli args
@ -74,7 +89,8 @@ if __name__ == '__main__':
        debug_on_exception=not args.options['fail_on_ex'],
        add_accept_imm=args.options['add_accept_imm'],
        scall_fs=args.syscall_opts['fs_access'],
-        scall_input=not args.syscall_opts['disable_input']
+        scall_input=not args.syscall_opts['disable_input'],
+        verbosity=args.verbose
    )
    for k, v in dict(cfg_dict).items():
        if v is None:
@ -93,17 +109,21 @@ if __name__ == '__main__':
    ]

    try:
-        cpu = CPU(cfg, ins_to_load)
-        loaded_exe = None
+        cpu = UserModeCPU(ins_to_load, cfg)
+
+        opts = AssemblyFileLoader.get_options(sys.argv)
        for file in args.files:
-            tk = cpu.get_tokenizer(RiscVInput.from_file(file))
-            tk.tokenize()
-            loaded_exe = cpu.load(ExecutableParser(tk).parse())
-        # run the last loaded executable
-        cpu.run_loaded(loaded_exe)
+            loader = AssemblyFileLoader.instantiate(file, opts)
+            cpu.load_program(loader.parse())
+
+        # set up a stack
+        cpu.setup_stack(cfg.stack_size)
+
+        # launch the last loaded program
+        cpu.launch(cpu.mmu.programs[-1], verbose=cfg.verbosity > 1)
+
    except RiscemuBaseException as e:
-        print("Error while parsing: {}".format(e.message()))
-        import traceback
+        print("Error: {}".format(e.message()))
+        e.print_stacktrace()

-        traceback.print_exception(type(e), e, e.__traceback__)
        sys.exit(1)
--- a/riscemu/assembler.py
+++ b/riscemu/assembler.py
@ -0,0 +1,214 @@
+from enum import Enum, auto
+from typing import List
+from typing import Optional, Tuple, Union
+
+from .colors import FMT_PARSE, FMT_NONE
+from riscemu.types.exceptions import ParseException, ASSERT_LEN
+from .helpers import parse_numeric_argument, align_addr, get_section_base_name
+from .tokenizer import Token
+from .types import Program, T_RelativeAddress, InstructionContext, Instruction, BinaryDataMemorySection, \
+    InstructionMemorySection, Int32
+
+INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini')
+"""
+A tuple containing all section names which contain executable code (instead of data)
+
+The first segment of each segment (first segment of ".text.main" is ".text") is checked
+against this list to determine the type of it.
+"""
+
+
+class MemorySectionType(Enum):
+    Data = auto()
+    Instructions = auto()
+
+
+class CurrentSection:
+    name: str
+    data: Union[List[Instruction], bytearray]
+    type: MemorySectionType
+    base: int
+
+    def __init__(self, name: str, type: MemorySectionType, base: int = 0):
+        self.name = name
+        self.type = type
+        self.base = base
+        if self.type == MemorySectionType.Data:
+            self.data = bytearray()
+        elif self.type == MemorySectionType.Instructions:
+            self.data = list()
+        else:
+            raise ParseException("Unknown section type: {}".format(type))
+
+    def current_address(self) -> T_RelativeAddress:
+        if self.type == MemorySectionType.Data:
+            return len(self.data) + self.base
+        return len(self.data) * 4 + self.base
+
+    def __repr__(self):
+        return "{}(name={},data={},type={})".format(
+            self.__class__.__name__, self.name,
+            self.data, self.type.name
+        )
+
+
+class ParseContext:
+    section: Optional[CurrentSection]
+    context: InstructionContext
+    program: Program
+
+    def __init__(self, name: str):
+        self.program = Program(name)
+        self.context = self.program.context
+        self.section = None
+
+    def finalize(self) -> Program:
+        self._finalize_section()
+        return self.program
+
+    def _finalize_section(self):
+        if self.section is None:
+            return
+        if self.section.type == MemorySectionType.Data:
+            section = BinaryDataMemorySection(
+                self.section.data, self.section.name, self.context, self.program.name, self.section.base
+            )
+            self.program.add_section(section)
+        elif self.section.type == MemorySectionType.Instructions:
+            section = InstructionMemorySection(
+                self.section.data, self.section.name, self.context, self.program.name, self.section.base
+            )
+            self.program.add_section(section)
+        self.section = None
+
+    def new_section(self, name: str, type: MemorySectionType, alignment: int = 4):
+        base = 0
+        if self.section is not None:
+            base = align_addr(self.section.current_address(), alignment)
+        self._finalize_section()
+        self.section = CurrentSection(name, type, base)
+
+    def add_label(self, name: str, value: int, is_global: bool = False, is_relative: bool = False):
+        self.context.labels[name] = value
+        if is_global:
+            self.program.global_labels.add(name)
+        if is_relative:
+            self.program.relative_labels.add(name)
+
+    def __repr__(self):
+        return "{}(\n\tsetion={},\n\tprogram={}\n)".format(
+            self.__class__.__name__, self.section, self.program
+        )
+
+
+def ASSERT_IN_SECTION_TYPE(context: ParseContext, type: MemorySectionType):
+    if context.section is None:
+        raise ParseException('Error, expected to be in {} section, but no section is present...'.format(type.name))
+    if context.section.type != type:
+        raise ParseException(
+            'Error, expected to be in {} section, but currently in {}...'.format(type.name, context.section)
+        )
+
+
+class AssemblerDirectives:
+    """
+    This class represents a collection of all assembler directives as documented by
+    https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md#pseudo-ops
+
+    All class methods prefixed with op_ are directly used as assembler directives.
+    """
+
+    @classmethod
+    def op_align(cls, token: Token, args: Tuple[str], context: ParseContext):
+        ASSERT_LEN(args, 1)
+        ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
+        align_to = parse_numeric_argument(args[0])
+        current_mod = context.section.current_address() % align_to
+        if current_mod == 0:
+            return
+        context.section.data += bytearray(align_to - current_mod)
+
+    @classmethod
+    def op_section(cls, token: Token, args: Tuple[str], context: ParseContext):
+        ASSERT_LEN(args, 1)
+        if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES:
+            context.new_section(args[0], MemorySectionType.Instructions)
+        else:
+            context.new_section(args[0], MemorySectionType.Data)
+
+    @classmethod
+    def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext):
+        ASSERT_LEN(args, 1)
+        context.program.global_labels.add(args[0])
+
+    @classmethod
+    def op_global(cls, token: Token, args: Tuple[str], context: ParseContext):
+        cls.op_globl(token, args, context)
+
+    @classmethod
+    def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext):
+        ASSERT_LEN(args, 2)
+        name = args[0]
+        value = parse_numeric_argument(args[1])
+        context.context.labels[name] = value
+
+    @classmethod
+    def op_space(cls, token: Token, args: Tuple[str], context: ParseContext):
+        ASSERT_LEN(args, 1)
+        ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
+
+        size = parse_numeric_argument(args[0])
+        cls.add_bytes(size, None, context)
+
+    @classmethod
+    def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext):
+        ASSERT_LEN(args, 1)
+        ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
+        size = parse_numeric_argument(args[0])
+        cls.add_bytes(size, bytearray(size), context)
+
+    @classmethod
+    def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext):
+        ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
+
+        if content is None:
+            content = bytearray(size)
+        if isinstance(content, int):
+            content = Int32(content).to_bytes(size)
+
+        context.section.data += content
+
+    @classmethod
+    def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True):
+        # replace '\t' and '\n' escape sequences
+        text = text.replace('\\n', '\n').replace('\\t', '\t')
+
+        encoded_bytes = bytearray(text.encode('ascii'))
+        if zero_terminate:
+            encoded_bytes += bytearray(1)
+        cls.add_bytes(len(encoded_bytes), encoded_bytes, context)
+
+    @classmethod
+    def handle_instruction(cls, token: Token, args: Tuple[str], context: ParseContext):
+        op = token.value[1:]
+        if hasattr(cls, 'op_' + op):
+            getattr(cls, 'op_' + op)(token, args, context)
+        elif op in ('text', 'data', 'rodata', 'bss', 'sbss'):
+            cls.op_section(token, (token.value,), context)
+        elif op in ('string', 'asciiz', 'asciz', 'ascii'):
+            ASSERT_LEN(args, 1)
+            cls.add_text(args[0], context, op == 'ascii')
+        elif op in DATA_OP_SIZES:
+            size = DATA_OP_SIZES[op]
+            for arg in args:
+                cls.add_bytes(size, parse_numeric_argument(arg), context)
+        else:
+            print(FMT_PARSE + "Unknown assembler directive: {} {} in {}".format(token, args, context) + FMT_NONE)
+
+
+DATA_OP_SIZES = {
+    'byte': 1,
+    '2byte': 2, 'half': 2, 'short': 2,
+    '4byte': 4, 'word': 4, 'long': 4,
+    '8byte': 8, 'dword': 8, 'quad': 8,
+}
--- a/riscemu/config.py
+++ b/riscemu/config.py
@ -1,16 +1,15 @@
 """
-RiscEmu (c) 2021 Anton Lydike
+RiscEmu (c) 2021-2022 Anton Lydike

 SPDX-License-Identifier: MIT
 """

 from dataclasses import dataclass
-from typing import Optional


@dataclass(frozen=True, init=True)
 class RunConfig:
-    stack_size: int = 8 * 1024 * 64 # for 8KB stack
+    stack_size: int = 8 * 1024 * 64  # for 8KB stack
    include_scall_symbols: bool = True
    add_accept_imm: bool = False
    # debugging
@ -20,4 +19,7 @@ class RunConfig:
    scall_input: bool = True
    scall_fs: bool = False
    verbosity: int = 0
+    slowdown: float = 1

+
+CONFIG = RunConfig()
--- a/riscemu/debug.py
+++ b/riscemu/debug.py
@ -3,36 +3,37 @@ RiscEmu (c) 2021 Anton Lydike

 SPDX-License-Identifier: MIT
 """
+import os.path

-import typing
-from .Registers import Registers
-from .colors import FMT_DEBUG, FMT_NONE
-from .Executable import LoadedInstruction
+from .types import SimpleInstruction
 from .helpers import *

 if typing.TYPE_CHECKING:
-    from . import *
+    from riscemu import CPU, Registers

+HIST_FILE = os.path.join(os.path.expanduser('~'), '.riscemu_history')

-def launch_debug_session(cpu: 'CPU', mmu: 'MMU', reg: 'Registers', prompt=""):
-    if not cpu.conf.debug_instruction or cpu.active_debug:
+
+def launch_debug_session(cpu: 'CPU', prompt=""):
+    if cpu.debugger_active:
        return
    import code
    import readline
    import rlcompleter

-    cpu.active_debug = True
+    # set the active debug flag
+    cpu.debugger_active = True

    # setup some aliases:
-    registers = reg
-    regs = reg
-    memory = mmu
-    mem = mmu
-    syscall_interface = cpu.syscall_int
+    registers = cpu.regs
+    regs = cpu.regs
+    memory = cpu.mmu
+    mem = cpu.mmu
+    mmu = cpu.mmu

    # setup helper functions:
    def dump(what, *args, **kwargs):
-        if isinstance(what, Registers):
+        if what == regs:
            regs.dump(*args, **kwargs)
        else:
            mmu.dump(what, *args, **kwargs)
@ -48,22 +49,47 @@ def launch_debug_session(cpu: 'CPU', mmu: 'MMU', reg: 'Registers', prompt=""):
        if len(args) > 3:
            print("Invalid arg count!")
            return
-        bin = mmu.get_bin_containing(cpu.pc)
-
-        ins = LoadedInstruction(name, list(args), bin)
-        print(FMT_DEBUG + "Running instruction " + ins + FMT_NONE)
+        context = mmu.context_for(cpu.pc)
+
+        ins = SimpleInstruction(
+            name,
+            tuple(args),
+            context,
+            cpu.pc)
+        print(FMT_DEBUG + "Running instruction {}".format(ins) + FMT_NONE)
        cpu.run_instruction(ins)

    def cont(verbose=False):
-        cpu.continue_from_debugger(verbose)
+        try:
+            cpu.run(verbose)
+        except LaunchDebuggerException:
+            print(FMT_DEBUG + 'Returning to debugger...')
+            return

    def step():
-        cpu.step()
+        try:
+            cpu.step()
+        except LaunchDebuggerException:
+            return

+    # collect all variables
    sess_vars = globals()
    sess_vars.update(locals())

+    # add tab completion
    readline.set_completer(rlcompleter.Completer(sess_vars).complete)
    readline.parse_and_bind("tab: complete")
-    code.InteractiveConsole(sess_vars).interact(banner=FMT_DEBUG + prompt + FMT_NONE, exitmsg="Exiting debugger")
-    cpu.active_debug = False
+    if os.path.exists(HIST_FILE):
+        readline.read_history_file(HIST_FILE)
+
+    relaunch_debugger = False
+
+    try:
+        code.InteractiveConsole(sess_vars).interact(
+            banner=FMT_DEBUG + prompt + FMT_NONE,
+            exitmsg="Exiting debugger",
+        )
+    finally:
+        cpu.debugger_active = False
+        readline.write_history_file(HIST_FILE)
+
--- a/riscemu/decoder/formatter.py
+++ b/riscemu/decoder/formatter.py
@ -20,11 +20,11 @@ def format_ins(ins: int, name: str, fmt: str = 'int'):
        return f"{name} <unknown op>"

    decoder = INSTRUCTION_ARGS_DECODER[opcode]
-    if name in ('ecall', 'ebreak'):
+    if name in ('ecall', 'ebreak', 'mret', 'sret', 'uret'):
        return name
    if opcode in (0x8, 0x0):
        r1, r2, imm = decoder(ins)
-        return f"{name:<7} {r1}, {imm}({r2})"
+        return f"{name:<7} {RISCV_REGS[r1]}, {imm}({RISCV_REGS[r2]})"
    elif decoder in (decode_i, decode_i_unsigned, decode_b, decode_i_shamt, decode_s):
        r1, r2, imm = decoder(ins)
        r1, r2 = RISCV_REGS[r1], RISCV_REGS[r2]
--- a/riscemu/helpers.py
+++ b/riscemu/helpers.py
@ -5,7 +5,10 @@ SPDX-License-Identifier: MIT
 """

 from math import log10, ceil
-from .Exceptions import *
+from typing import Iterable, Iterator, TypeVar, Generic, List, Optional
+
+from .types.exceptions import *
+from .types import Int32, UInt32


 def align_addr(addr: int, to_bytes: int = 8) -> int:
@ -27,39 +30,6 @@ def parse_numeric_argument(arg: str) -> int:
        raise ParseException('Invalid immediate argument \"{}\", maybe missing symbol?'.format(arg), (arg, ex))


-def int_to_bytes(val, bytes=4, unsigned=False) -> bytearray:
-    """
-    int -> byte (two's complement)
-    """
-    if unsigned and val < 0:
-        raise NumberFormatException("unsigned negative number!")
-    return bytearray(to_unsigned(val, bytes).to_bytes(bytes, 'little'))
-
-
-def int_from_bytes(bytes, unsigned=False) -> int:
-    """
-    byte -> int (two's complement)
-    """
-    num = int.from_bytes(bytes, 'little')
-
-    if unsigned:
-        return num
-
-    return to_signed(num)
-
-
-def to_unsigned(num: int, bytes=4) -> int:
-    if num < 0:
-        return (2 ** (bytes * 8)) + num
-    return num
-
-
-def to_signed(num: int, bytes=4) -> int:
-    if num >> (bytes * 8 - 1):
-        return num - 2 ** (8 * bytes)
-    return num
-
-
 def create_chunks(my_list, chunk_size):
    """Split a list like [a,b,c,d,e,f,g,h,i,j,k,l,m] into e.g. [[a,b,c,d],[e,f,g,h],[i,j,k,l],[m]]"""
    return [my_list[i:i + chunk_size] for i in range(0, len(my_list), chunk_size)]
@ -85,10 +55,10 @@ def format_bytes(byte_arr: bytearray, fmt: str, group: int = 1, highlight: int =
        return highlight_in_list(['0x{}'.format(ch.hex()) for ch in chunks], highlight)
    if fmt == 'int':
        spc = str(ceil(log10(2 ** (group * 8 - 1))) + 1)
-        return highlight_in_list([('{:0' + spc + 'd}').format(int_from_bytes(ch)) for ch in chunks], highlight)
+        return highlight_in_list([('{:0' + spc + 'd}').format(Int32(ch)) for ch in chunks], highlight)
    if fmt == 'uint':
        spc = str(ceil(log10(2 ** (group * 8))))
-        return highlight_in_list([('{:0' + spc + 'd}').format(int_from_bytes(ch, unsigned=True)) for ch in chunks],
+        return highlight_in_list([('{:0' + spc + 'd}').format(UInt32(ch)) for ch in chunks],
                                 highlight)
    if fmt == 'ascii':
        return "".join(repr(chr(b))[1:-1] for b in byte_arr)
@ -105,3 +75,42 @@ def bind_twos_complement(val):
    elif val > 2147483647:
        return val - 4294967296
    return val
+
+
+T = TypeVar('T')
+
+
+class Peekable(Generic[T], Iterator[T]):
+    def __init__(self, iterable: Iterable[T]):
+        self.iterable = iter(iterable)
+        self.cache: List[T] = list()
+
+    def __iter__(self) -> Iterator[T]:
+        return self
+
+    def __next__(self) -> T:
+        if self.cache:
+            return self.cache.pop()
+        return next(self.iterable)
+
+    def peek(self) -> Optional[T]:
+        try:
+            if self.cache:
+                return self.cache[0]
+            pop = next(self.iterable)
+            self.cache.append(pop)
+            return pop
+        except StopIteration:
+            return None
+
+    def push_back(self, item: T):
+        self.cache = [item] + self.cache
+
+    def is_empty(self) -> bool:
+        return self.peek() is None
+
+
+def get_section_base_name(section_name: str) -> str:
+    if '.' not in section_name:
+        print(FMT_PARSE + f"Invalid section {section_name}, not starting with a dot!" + FMT_NONE)
+    return '.' + section_name.split('.')[1]
--- a/riscemu/instructions/RV32A.py
+++ b/riscemu/instructions/RV32A.py
@ -1,6 +1,6 @@
-from .InstructionSet import InstructionSet, LoadedInstruction
-from ..Exceptions import INS_NOT_IMPLEMENTED
-from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed
+from .instruction_set import InstructionSet, Instruction
+from riscemu.types.exceptions import INS_NOT_IMPLEMENTED
+from ..types import Int32, UInt32


 class RV32A(InstructionSet):
@ -10,69 +10,69 @@ class RV32A(InstructionSet):
    for this?
    """

-    def instruction_lr_w(self, ins: 'LoadedInstruction'):
+    def instruction_lr_w(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_sc_w(self, ins: 'LoadedInstruction'):
+    def instruction_sc_w(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_amoswap_w(self, ins: 'LoadedInstruction'):
+    def instruction_amoswap_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
        if dest == 'zero':
-            self.mmu.write(addr, int_to_bytes(addr, 4))
+            self.mmu.write(addr, val.to_bytes())
        else:
-            old = int_from_bytes(self.mmu.read(addr, 4))
-            self.mmu.write(addr, int_to_bytes(val, 4))
+            old = Int32(self.mmu.read(addr, 4))
+            self.mmu.write(addr, val.to_bytes())
            self.regs.set(dest, old)

-    def instruction_amoadd_w(self, ins: 'LoadedInstruction'):
+    def instruction_amoadd_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
-        old = int_from_bytes(self.mmu.read(addr, 4))
-        self.mmu.write(addr, int_to_bytes(old + val, 4))
+        old = Int32(self.mmu.read(addr, 4))
+        self.mmu.write(addr, (old + val).to_bytes(4))
        self.regs.set(dest, old)

-    def instruction_amoand_w(self, ins: 'LoadedInstruction'):
+    def instruction_amoand_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
-        old = int_from_bytes(self.mmu.read(addr, 4))
-        self.mmu.write(addr, int_to_bytes(old & val, 4))
+        old = Int32(self.mmu.read(addr, 4))
+        self.mmu.write(addr, (old & val).to_bytes(4))
        self.regs.set(dest, old)

-    def instruction_amoor_w(self, ins: 'LoadedInstruction'):
+    def instruction_amoor_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
-        old = int_from_bytes(self.mmu.read(addr, 4))
-        self.mmu.write(addr, int_to_bytes(old | val, 4))
+        old = Int32(self.mmu.read(addr, 4))
+        self.mmu.write(addr, (old | val).to_bytes(4))
        self.regs.set(dest, old)

-    def instruction_amoxor_w(self, ins: 'LoadedInstruction'):
+    def instruction_amoxor_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
-        old = int_from_bytes(self.mmu.read(addr, 4))
-        self.mmu.write(addr, int_to_bytes(old ^ val, 4))
+        old = Int32(self.mmu.read(addr, 4))
+        self.mmu.write(addr, (old ^ val).to_bytes(4))
        self.regs.set(dest, old)

-    def instruction_amomax_w(self, ins: 'LoadedInstruction'):
+    def instruction_amomax_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
-        old = int_from_bytes(self.mmu.read(addr, 4))
-        self.mmu.write(addr, int_to_bytes(max(old, val), 4))
+        old = Int32(self.mmu.read(addr, 4))
+        self.mmu.write(addr, max(old, val).to_bytes(4))
        self.regs.set(dest, old)

-    def instruction_amomaxu_w(self, ins: 'LoadedInstruction'):
-        dest, addr, val = self.parse_rd_rs_rs(ins)
-        val = to_unsigned(val)
-        old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True)
+    def instruction_amomaxu_w(self, ins: 'Instruction'):
+        val: UInt32
+        dest, addr, val = self.parse_rd_rs_rs(ins, signed=False)
+        old = UInt32(self.mmu.read(addr, 4))

-        self.mmu.write(addr, int_to_bytes(to_signed(max(old, val)), 4))
+        self.mmu.write(addr, max(old, val).to_bytes())
        self.regs.set(dest, old)

-    def instruction_amomin_w(self, ins: 'LoadedInstruction'):
+    def instruction_amomin_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
-        old = int_from_bytes(self.mmu.read(addr, 4))
-        self.mmu.write(addr, int_to_bytes(min(old, val), 4))
+        old = Int32(self.mmu.read(addr, 4))
+        self.mmu.write(addr, min(old, val).to_bytes(4))
        self.regs.set(dest, old)

-    def instruction_amominu_w(self, ins: 'LoadedInstruction'):
-        dest, addr, val = self.parse_rd_rs_rs(ins)
-        val = to_unsigned(val)
-        old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True)
+    def instruction_amominu_w(self, ins: 'Instruction'):
+        val: UInt32
+        dest, addr, val = self.parse_rd_rs_rs(ins, signed=False)
+        old = UInt32(self.mmu.read(addr, 4))

-        self.mmu.write(addr, int_to_bytes(to_signed(min(old, val)), 4))
+        self.mmu.write(addr, min(old, val).to_bytes(4))
        self.regs.set(dest, old)
--- a/riscemu/instructions/RV32I.py
+++ b/riscemu/instructions/RV32I.py
@ -4,14 +4,13 @@ RiscEmu (c) 2021 Anton Lydike
 SPDX-License-Identifier: MIT
 """

-from .InstructionSet import *
+from .instruction_set import *
+from ..CPU import UserModeCPU

-from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed
 from ..colors import FMT_DEBUG, FMT_NONE
-from ..debug import launch_debug_session
-from ..Exceptions import LaunchDebuggerException
-from ..Syscall import Syscall
-from ..Executable import LoadedInstruction
+from riscemu.types.exceptions import LaunchDebuggerException
+from ..syscall import Syscall
+from ..types import Instruction, Int32, UInt32


 class RV32I(InstructionSet):
@ -23,79 +22,79 @@ class RV32I(InstructionSet):
    See https://maxvytech.com/images/RV32I-11-2018.pdf for a more detailed overview
    """

-    def instruction_lb(self, ins: 'LoadedInstruction'):
+    def instruction_lb(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
-        self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1)))
+        self.regs.set(rd, Int32.sign_extend(self.mmu.read(addr.unsigned_value, 1), 8))

-    def instruction_lh(self, ins: 'LoadedInstruction'):
+    def instruction_lh(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
-        self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2)))
+        self.regs.set(rd, Int32.sign_extend(self.mmu.read(addr.unsigned_value, 2), 16))

-    def instruction_lw(self, ins: 'LoadedInstruction'):
+    def instruction_lw(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
-        self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 4)))
+        self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 4)))

-    def instruction_lbu(self, ins: 'LoadedInstruction'):
+    def instruction_lbu(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
-        self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1), unsigned=True))
+        self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 1)))

-    def instruction_lhu(self, ins: 'LoadedInstruction'):
+    def instruction_lhu(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
-        self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2), unsigned=True))
+        self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 2)))

-    def instruction_sb(self, ins: 'LoadedInstruction'):
+    def instruction_sb(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
-        self.mmu.write(addr, 1, int_to_bytes(self.regs.get(rd), 1))
+        self.mmu.write(addr.unsigned_value, 1, self.regs.get(rd).to_bytes(1))

-    def instruction_sh(self, ins: 'LoadedInstruction'):
+    def instruction_sh(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
-        self.mmu.write(addr, 2, int_to_bytes(self.regs.get(rd), 2))
+        self.mmu.write(addr.unsigned_value, 2, self.regs.get(rd).to_bytes(2))

-    def instruction_sw(self, ins: 'LoadedInstruction'):
+    def instruction_sw(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
-        self.mmu.write(addr, 4, int_to_bytes(self.regs.get(rd), 4))
+        self.mmu.write(addr.unsigned_value, 4, self.regs.get(rd).to_bytes(4))

-    def instruction_sll(self, ins: 'LoadedInstruction'):
+    def instruction_sll(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
        src2 = ins.get_reg(2)
        self.regs.set(
            dst,
-            to_signed(to_unsigned(self.regs.get(src1)) << (self.regs.get(src2) & 0b11111))
+            self.regs.get(src1) << (self.regs.get(src2) & 0b11111)
        )

-    def instruction_slli(self, ins: 'LoadedInstruction'):
+    def instruction_slli(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
        imm = ins.get_imm(2)
        self.regs.set(
            dst,
-            to_signed(to_unsigned(self.regs.get(src1)) << (imm & 0b11111))
+            self.regs.get(src1) << (imm & 0b11111)
        )

-    def instruction_srl(self, ins: 'LoadedInstruction'):
+    def instruction_srl(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
        src2 = ins.get_reg(2)
        self.regs.set(
            dst,
-            to_signed(to_unsigned(self.regs.get(src1)) >> (self.regs.get(src2) & 0b11111))
+            self.regs.get(src1).shift_right_logical(self.regs.get(src2) & 0b11111)
        )

-    def instruction_srli(self, ins: 'LoadedInstruction'):
+    def instruction_srli(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
        imm = ins.get_imm(2)
        self.regs.set(
            dst,
-            to_signed(to_unsigned(self.regs.get(src1)) >> (imm & 0b11111))
+            self.regs.get(src1).shift_right_logical(imm & 0b11111)
        )

-    def instruction_sra(self, ins: 'LoadedInstruction'):
+    def instruction_sra(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
@ -105,7 +104,7 @@ class RV32I(InstructionSet):
            self.regs.get(src1) >> (self.regs.get(src2) & 0b11111)
        )

-    def instruction_srai(self, ins: 'LoadedInstruction'):
+    def instruction_srai(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
@ -115,154 +114,148 @@ class RV32I(InstructionSet):
            self.regs.get(src1) >> (imm & 0b11111)
        )

-    def instruction_add(self, ins: 'LoadedInstruction'):
-        dst = ""
-        if self.cpu.conf.add_accept_imm:
-            try:
-                dst, rs1, rs2 = self.parse_rd_rs_imm(ins)
-            except:
-                pass
-        if not dst:
-            dst, rs1, rs2 = self.parse_rd_rs_rs(ins)
+    def instruction_add(self, ins: 'Instruction'):
+        # FIXME: once configuration is figured out, add flag to support immediate arg in add instruction
+        dst, rs1, rs2 = self.parse_rd_rs_rs(ins)

        self.regs.set(
            dst,
            rs1 + rs2
        )

-    def instruction_addi(self, ins: 'LoadedInstruction'):
+    def instruction_addi(self, ins: 'Instruction'):
        dst, rs1, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(
            dst,
            rs1 + imm
        )

-    def instruction_sub(self, ins: 'LoadedInstruction'):
+    def instruction_sub(self, ins: 'Instruction'):
        dst, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            dst,
            rs1 - rs2
        )

-    def instruction_lui(self, ins: 'LoadedInstruction'):
+    def instruction_lui(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
-        imm = ins.get_imm(1)
-        self.regs.set(reg, imm << 12)
+        imm = UInt32(ins.get_imm(1) << 12)
+        self.regs.set(reg, Int32(imm))

-    def instruction_auipc(self, ins: 'LoadedInstruction'):
+    def instruction_auipc(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
-        imm = to_unsigned(ins.get_imm(1))
-        self.regs.set(reg, self.pc + (imm << 12))
+        imm = UInt32(ins.get_imm(1) << 12)
+        self.regs.set(reg, imm.signed() + self.pc)

-    def instruction_xor(self, ins: 'LoadedInstruction'):
+    def instruction_xor(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 ^ rs2
        )

-    def instruction_xori(self, ins: 'LoadedInstruction'):
+    def instruction_xori(self, ins: 'Instruction'):
        rd, rs1, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(
            rd,
            rs1 ^ imm
        )

-    def instruction_or(self, ins: 'LoadedInstruction'):
+    def instruction_or(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 | rs2
        )

-    def instruction_ori(self, ins: 'LoadedInstruction'):
+    def instruction_ori(self, ins: 'Instruction'):
        rd, rs1, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(
            rd,
            rs1 | imm
        )

-    def instruction_and(self, ins: 'LoadedInstruction'):
+    def instruction_and(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 & rs2
        )

-    def instruction_andi(self, ins: 'LoadedInstruction'):
+    def instruction_andi(self, ins: 'Instruction'):
        rd, rs1, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(
            rd,
            rs1 & imm
        )

-    def instruction_slt(self, ins: 'LoadedInstruction'):
+    def instruction_slt(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
-            int(rs1 < rs2)
+            Int32(int(rs1 < rs2))
        )

-    def instruction_slti(self, ins: 'LoadedInstruction'):
+    def instruction_slti(self, ins: 'Instruction'):
        rd, rs1, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(
            rd,
-            int(rs1 < imm)
+            Int32(int(rs1 < imm))
        )

-    def instruction_sltu(self, ins: 'LoadedInstruction'):
+    def instruction_sltu(self, ins: 'Instruction'):
        dst, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
        self.regs.set(
            dst,
-            int(rs1 < rs2)
+            Int32(int(rs1 < rs2))
        )

-    def instruction_sltiu(self, ins: 'LoadedInstruction'):
+    def instruction_sltiu(self, ins: 'Instruction'):
        dst, rs1, imm = self.parse_rd_rs_imm(ins, signed=False)
        self.regs.set(
            dst,
-            int(rs1 < imm)
+            Int32(int(rs1 < imm))
        )

-    def instruction_beq(self, ins: 'LoadedInstruction'):
+    def instruction_beq(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 == rs2:
-            self.pc = dst
+            self.pc = dst.unsigned_value

-    def instruction_bne(self, ins: 'LoadedInstruction'):
+    def instruction_bne(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 != rs2:
-            self.pc = dst
+            self.pc = dst.unsigned_value

-    def instruction_blt(self, ins: 'LoadedInstruction'):
+    def instruction_blt(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 < rs2:
-            self.pc = dst
+            self.pc = dst.unsigned_value

-    def instruction_bge(self, ins: 'LoadedInstruction'):
+    def instruction_bge(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 >= rs2:
-            self.pc = dst
+            self.pc = dst.unsigned_value

-    def instruction_bltu(self, ins: 'LoadedInstruction'):
+    def instruction_bltu(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
        if rs1 < rs2:
-            self.pc = dst
+            self.pc = dst.unsigned_value

-    def instruction_bgeu(self, ins: 'LoadedInstruction'):
+    def instruction_bgeu(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
        if rs1 >= rs2:
-            self.pc = dst
+            self.pc = dst.unsigned_value

    # technically deprecated
-    def instruction_j(self, ins: 'LoadedInstruction'):
+    def instruction_j(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 1)
        addr = ins.get_imm(0)
        self.pc = addr

-    def instruction_jal(self, ins: 'LoadedInstruction'):
+    def instruction_jal(self, ins: 'Instruction'):
        reg = 'ra'  # default register is ra
        if len(ins.args) == 1:
            addr = ins.get_imm(0)
@ -270,60 +263,59 @@ class RV32I(InstructionSet):
            ASSERT_LEN(ins.args, 2)
            reg = ins.get_reg(0)
            addr = ins.get_imm(1)
-        self.regs.set(reg, self.pc)
+        self.regs.set(reg, Int32(self.pc))
        self.pc = addr

-    def instruction_jalr(self, ins: 'LoadedInstruction'):
+    def instruction_jalr(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
        addr = ins.get_imm(1)
-        self.regs.set(reg, self.pc)
+        self.regs.set(reg, Int32(self.pc))
        self.pc = addr

-    def instruction_ret(self, ins: 'LoadedInstruction'):
+    def instruction_ret(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 0)
-        self.pc = self.regs.get('ra')
+        self.pc = self.regs.get('ra').value

-    def instruction_ecall(self, ins: 'LoadedInstruction'):
+    def instruction_ecall(self, ins: 'Instruction'):
        self.instruction_scall(ins)

-    def instruction_ebreak(self, ins: 'LoadedInstruction'):
+    def instruction_ebreak(self, ins: 'Instruction'):
        self.instruction_sbreak(ins)

-    def instruction_scall(self, ins: 'LoadedInstruction'):
+    def instruction_scall(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 0)
+
+        if not isinstance(self.cpu, UserModeCPU):
+            # FIXME: add exception for syscall not supported or something
+            raise
+
        syscall = Syscall(self.regs.get('a7'), self.cpu)
        self.cpu.syscall_int.handle_syscall(syscall)

-    def instruction_sbreak(self, ins: 'LoadedInstruction'):
+    def instruction_sbreak(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 0)
-        if self.cpu.active_debug:
-            print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE)
-            raise LaunchDebuggerException()
-        launch_debug_session(
-            self.cpu,
-            self.mmu,
-            self.regs,
-            "Debug instruction encountered at 0x{:08X}".format(self.pc - 1)
-        )

-    def instruction_nop(self, ins: 'LoadedInstruction'):
+        print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE)
+        raise LaunchDebuggerException()
+
+    def instruction_nop(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 0)
        pass

-    def instruction_li(self, ins: 'LoadedInstruction'):
+    def instruction_li(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
        immediate = ins.get_imm(1)
-        self.regs.set(reg, immediate)
+        self.regs.set(reg, Int32(immediate))

-    def instruction_la(self, ins: 'LoadedInstruction'):
+    def instruction_la(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
        immediate = ins.get_imm(1)
-        self.regs.set(reg, immediate)
+        self.regs.set(reg, Int32(immediate))

-    def instruction_mv(self, ins: 'LoadedInstruction'):
+    def instruction_mv(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        rd, rs = ins.get_reg(0), ins.get_reg(1)
        self.regs.set(rd, self.regs.get(rs))
--- a/riscemu/instructions/RV32M.py
+++ b/riscemu/instructions/RV32M.py
@ -4,56 +4,56 @@ RiscEmu (c) 2021 Anton Lydike
 SPDX-License-Identifier: MIT
 """

-from .InstructionSet import *
-from ..Exceptions import INS_NOT_IMPLEMENTED
+from .instruction_set import *
+from riscemu.types.exceptions import INS_NOT_IMPLEMENTED


 class RV32M(InstructionSet):
    """
    The RV32M Instruction set, containing multiplication and division instructions
    """
-    def instruction_mul(self, ins: 'LoadedInstruction'):
+    def instruction_mul(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 * rs2
        )

-    def instruction_mulh(self, ins: 'LoadedInstruction'):
+    def instruction_mulh(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            (rs1 * rs2) >> 32
        )

-    def instruction_mulhsu(self, ins: 'LoadedInstruction'):
+    def instruction_mulhsu(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_mulhu(self, ins: 'LoadedInstruction'):
+    def instruction_mulhu(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_div(self, ins: 'LoadedInstruction'):
+    def instruction_div(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 // rs2
        )

-    def instruction_divu(self, ins: 'LoadedInstruction'):
+    def instruction_divu(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
        self.regs.set(
            rd,
            rs1 // rs2
        )

-    def instruction_rem(self, ins: 'LoadedInstruction'):
+    def instruction_rem(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 % rs2
        )

-    def instruction_remu(self, ins: 'LoadedInstruction'):
+    def instruction_remu(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
        self.regs.set(
            rd,
--- a/riscemu/instructions/init.py
+++ b/riscemu/instructions/init.py
@ -6,7 +6,7 @@ SPDX-License-Identifier: MIT
 This package holds all instruction sets, available to the processor
 """

-from .InstructionSet import InstructionSet
+from .instruction_set import InstructionSet
 from .RV32M import RV32M
 from .RV32I import RV32I
 from .RV32A import RV32A
--- a/riscemu/instructions/instruction_set.py
+++ b/riscemu/instructions/instruction_set.py
@ -8,8 +8,8 @@ from typing import Tuple, Callable, Dict

 from abc import ABC
 from ..CPU import CPU
-from ..helpers import ASSERT_LEN, ASSERT_IN, to_unsigned
-from ..Executable import LoadedInstruction
+from riscemu.types.exceptions import ASSERT_LEN, ASSERT_IN
+from ..types import Instruction, Int32, UInt32


 class InstructionSet(ABC):
@ -30,7 +30,7 @@ class InstructionSet(ABC):
        self.name = self.__class__.__name__
        self.cpu = cpu

-    def load(self) -> Dict[str, Callable[['LoadedInstruction'], None]]:
+    def load(self) -> Dict[str, Callable[['Instruction'], None]]:
        """
        This is called by the CPU once it instantiates this instruction set

@ -51,7 +51,7 @@ class InstructionSet(ABC):
            if member.startswith('instruction_'):
                yield member[12:].replace('_', '.'), getattr(self, member)

-    def parse_mem_ins(self, ins: 'LoadedInstruction') -> Tuple[str, int]:
+    def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, Int32]:
        """
        parses both rd, rs, imm and rd, imm(rs) argument format and returns (rd, imm+rs1)
        (so a register and address tuple for memory instructions)
@ -69,7 +69,7 @@ class InstructionSet(ABC):
        rd = ins.get_reg(0)
        return rd, rs + imm

-    def parse_rd_rs_rs(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]:
+    def parse_rd_rs_rs(self, ins: 'Instruction', signed=True) -> Tuple[str, Int32, Int32]:
        """
        Assumes the command is in <name> rd, rs1, rs2 format
        Returns the name of rd, and the values in rs1 and rs2
@ -81,10 +81,10 @@ class InstructionSet(ABC):
                   self.get_reg_content(ins, 2)
        else:
            return ins.get_reg(0), \
-                   to_unsigned(self.get_reg_content(ins, 1)), \
-                   to_unsigned(self.get_reg_content(ins, 2))
+                   UInt32(self.get_reg_content(ins, 1)), \
+                   UInt32(self.get_reg_content(ins, 2))

-    def parse_rd_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]:
+    def parse_rd_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[str, Int32, Int32]:
        """
        Assumes the command is in <name> rd, rs, imm format
        Returns the name of rd, the value in rs and the immediate imm
@ -92,28 +92,28 @@ class InstructionSet(ABC):
        ASSERT_LEN(ins.args, 3)
        if signed:
            return ins.get_reg(0), \
-                   self.get_reg_content(ins, 1), \
-                   ins.get_imm(2)
+                   Int32(self.get_reg_content(ins, 1)), \
+                   Int32(ins.get_imm(2))
        else:
            return ins.get_reg(0), \
-                   to_unsigned(self.get_reg_content(ins, 1)), \
-                   to_unsigned(ins.get_imm(2))
+                   UInt32(self.get_reg_content(ins, 1)), \
+                   UInt32(ins.get_imm(2))

-    def parse_rs_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[int, int, int]:
+    def parse_rs_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[Int32, Int32, Int32]:
        """
        Assumes the command is in <name> rs1, rs2, imm format
        Returns the values in rs1, rs2 and the immediate imm
        """
        if signed:
-            return self.get_reg_content(ins, 0), \
-                   self.get_reg_content(ins, 1), \
-                   ins.get_imm(2)
+            return Int32(self.get_reg_content(ins, 0)), \
+                   Int32(self.get_reg_content(ins, 1)), \
+                   Int32(ins.get_imm(2))
        else:
-            return to_unsigned(self.get_reg_content(ins, 0)), \
-                   to_unsigned(self.get_reg_content(ins, 1)), \
-                   to_unsigned(ins.get_imm(2))
+            return UInt32(self.get_reg_content(ins, 0)), \
+                   UInt32(self.get_reg_content(ins, 1)), \
+                   UInt32(ins.get_imm(2))

-    def get_reg_content(self, ins: 'LoadedInstruction', ind: int) -> int:
+    def get_reg_content(self, ins: 'Instruction', ind: int) -> Int32:
        """
        get the register name from ins and then return the register contents
        """
--- a/riscemu/interactive.py
+++ b/riscemu/interactive.py
@ -0,0 +1,24 @@
+from riscemu import RunConfig
+from riscemu.types import InstructionMemorySection, SimpleInstruction, Program
+
+if __name__ == '__main__':
+    from .CPU import UserModeCPU
+    from .instructions import InstructionSetDict
+    from .debug import launch_debug_session
+
+    cpu = UserModeCPU(list(InstructionSetDict.values()), RunConfig(verbosity=4))
+
+    program = Program('interactive session', base=0x100)
+    context = program.context
+    program.add_section(InstructionMemorySection([
+        SimpleInstruction('ebreak', (), context, 0x100),
+        SimpleInstruction('addi', ('a0', 'zero', '0'), context, 0x104),
+        SimpleInstruction('addi', ('a7', 'zero', '93'), context, 0x108),
+        SimpleInstruction('scall', (), context, 0x10C),
+    ], '.text', context, program.name, 0x100))
+
+    cpu.load_program(program)
+
+    cpu.setup_stack()
+
+    cpu.launch(program)
--- a/riscemu/parser.py
+++ b/riscemu/parser.py
@ -0,0 +1,127 @@
+"""
+RiscEmu (c) 2021 Anton Lydike
+
+SPDX-License-Identifier: MIT
+"""
+import re
+from typing import Dict, Tuple, Iterable, Callable, List
+
+from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
+from .colors import FMT_PARSE
+from .helpers import Peekable
+from .tokenizer import Token, TokenType, tokenize
+from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction
+from .types.exceptions import ParseException
+
+
+def parse_instruction(token: Token, args: Tuple[str], context: ParseContext):
+    if context.section is None or context.section.type != MemorySectionType.Instructions:
+        raise ParseException("{} {} encountered in invalid context: {}".format(token, args, context))
+    ins = SimpleInstruction(token.value, args, context.context, context.section.current_address())
+    context.section.data.append(ins)
+
+
+def parse_label(token: Token, args: Tuple[str], context: ParseContext):
+    name = token.value[:-1]
+    if re.match(r'^\d+$', name):
+        # relative label:
+        context.context.numbered_labels[name].append(context.section.current_address())
+    else:
+        if name in context.context.labels:
+            print(FMT_PARSE + 'Warn: Symbol {} defined twice!'.format(name))
+        context.add_label(name, context.section.current_address(), is_relative=True)
+
+
+PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {
+    TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction,
+    TokenType.LABEL: parse_label,
+    TokenType.INSTRUCTION_NAME: parse_instruction
+}
+
+
+def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
+    """
+    Convert a token stream into a parsed program
+    :param name: the programs name
+    :param tokens_iter: the programs content, tokenized
+    :return: a parsed program
+    """
+    context = ParseContext(name)
+
+    for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
+        if token.type not in PARSERS:
+            raise ParseException("Unexpected token type: {}, {}".format(token, args))
+        PARSERS[token.type](token, args, context)
+
+    return context.finalize()
+
+
+def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]:
+    """
+    Convert an iterator over tokens into an iterator over tuples: (token, list(token))
+
+    The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before
+    a newline is encountered
+    :param tokens_iter: An iterator over tokens
+    :return: An iterator over a slightly more structured representation of the tokens
+    """
+    tokens: Peekable[Token] = Peekable[Token](tokens_iter)
+
+    while not tokens.is_empty():
+        token = next(tokens)
+        if token.type in (TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME):
+            yield token, tuple(take_arguments(tokens))
+
+
+def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
+    """
+    Consumes (argument comma)* and yields argument.value until newline is reached
+    If an argument is not followed by either a newline or a comma, a parse exception is raised
+    The newline at the end is consumed
+    :param tokens: A Peekable iterator over some Tokens
+    """
+    while True:
+        if tokens.peek().type == TokenType.ARGUMENT:
+            yield next(tokens).value
+        elif tokens.peek().type == TokenType.NEWLINE:
+            next(tokens)
+            break
+        elif tokens.peek().type == TokenType.COMMA:
+            next(tokens)
+        else:
+            break
+
+        # raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
+
+
+class AssemblyFileLoader(ProgramLoader):
+    """
+    This class loads assembly files written by hand. It understands some assembler directives and supports most
+    pseudo instructions. It does very little verification of source correctness.
+
+    It also supports numbered jump targets and properly supports local and global scope (.globl assembly directive)
+
+
+    The AssemblyFileLoader loads .asm, .S and .s files by default, and acts as a weak fallback to all other filetypes.
+    """
+    def parse(self) -> Program:
+        with open(self.source_path, 'r') as f:
+            return parse_tokens(self.filename, tokenize(f))
+
+    @classmethod
+    def can_parse(cls, source_path: str) -> float:
+        """
+
+        It also acts as a weak fallback if no other loaders want to take the file.
+
+        :param source_path: the path to the source file
+        :return:
+        """
+        # gcc recognizes these line endings as assembly. So we will do too.
+        if source_path.split('.')[-1] in ('asm', 'S', 's'):
+            return 1
+        return 0.01
+
+    @classmethod
+    def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
+        return argv, {}
--- a/riscemu/priv/CSR.py
+++ b/riscemu/priv/CSR.py
@ -2,49 +2,49 @@ from typing import Dict, Union, Callable, Optional
 from collections import defaultdict
 from .privmodes import PrivModes
 from .Exceptions import InstructionAccessFault
-from ..helpers import to_signed
 from ..colors import FMT_CSR, FMT_NONE

 from .CSRConsts import CSR_NAME_TO_ADDR, MSTATUS_LEN_2, MSTATUS_OFFSETS
+from ..types import UInt32


 class CSR:
    """
    This holds all Control and Status Registers (CSR)
    """
-    regs: Dict[int, int]
+    regs: Dict[int, UInt32]
    """
    All Control and Status Registers are stored here
    """

-    virtual_regs: Dict[int, Callable[[], int]]
+    virtual_regs: Dict[int, Callable[[], UInt32]]
    """
    list of virtual CSR registers, with values computed on read
    """

-    listeners: Dict[int, Callable[[int, int], None]]
+    listeners: Dict[int, Callable[[UInt32, UInt32], None]]

-    mstatus_cache: Dict[str, int]
+    mstatus_cache: Dict[str, UInt32]
    mstatus_cache_dirty = True

    def __init__(self):
-        self.regs = defaultdict(lambda: 0)
+        self.regs = defaultdict(lambda: UInt32(0))
        self.listeners = defaultdict(lambda: (lambda x, y: None))
        self.virtual_regs = dict()
        self.mstatus_cache = dict()
        # TODO: implement write masks (bitmasks which control writeable bits in registers

-    def set(self, addr: Union[str, int], val: int):
+    def set(self, addr: Union[str, int], val: Union[int, UInt32]):
        addr = self._name_to_addr(addr)
        if addr is None:
            return
-        val = to_signed(val)
+        val = UInt32(val)
        self.listeners[addr](self.regs[addr], val)
        if addr == 0x300:
            self.mstatus_cache_dirty = True
        self.regs[addr] = val

-    def get(self, addr: Union[str, int]) -> int:
+    def get(self, addr: Union[str, int]) -> UInt32:
        addr = self._name_to_addr(addr)
        if addr is None:
            raise RuntimeError(f"Invalid CSR name: {addr}!")
@ -52,7 +52,7 @@ class CSR:
            return self.virtual_regs[addr]()
        return self.regs[addr]

-    def set_listener(self, addr: Union[str, int], listener: Callable[[int, int], None]):
+    def set_listener(self, addr: Union[str, int], listener: Callable[[UInt32, UInt32], None]):
        addr = self._name_to_addr(addr)
        if addr is None:
            print("unknown csr address name: {}".format(addr))
@ -60,7 +60,7 @@ class CSR:
        self.listeners[addr] = listener

    # mstatus properties
-    def set_mstatus(self, name: str, val: int):
+    def set_mstatus(self, name: str, val: UInt32):
        """
        Set mstatus bits using this helper. mstatus is a 32 bit register, holding various machine status flags
        Setting them by hand is super painful, so this helper allows you to set specific bits.
@ -79,7 +79,7 @@ class CSR:
        new_val = erased | (val << off)
        self.set('mstatus', new_val)

-    def get_mstatus(self, name) -> int:
+    def get_mstatus(self, name) -> UInt32:
        if not self.mstatus_cache_dirty and name in self.mstatus_cache:
            return self.mstatus_cache[name]

@ -94,7 +94,7 @@ class CSR:
        return val

    def callback(self, addr: Union[str, int]):
-        def inner(func: Callable[[int, int], None]):
+        def inner(func: Callable[[UInt32, UInt32], None]):
            self.set_listener(addr, func)
            return func

@ -121,7 +121,7 @@ class CSR:
        if addr is None:
            print("unknown csr address name: {}".format(addr))

-        def inner(func: Callable[[], int]):
+        def inner(func: Callable[[], UInt32]):
            self.virtual_regs[addr] = func
            return func

--- a/riscemu/priv/ElfLoader.py
+++ b/riscemu/priv/ElfLoader.py
@ -1,11 +1,9 @@
-from dataclasses import dataclass
-from typing import List, Dict, Tuple
+from typing import List

 from .Exceptions import *
-from ..Exceptions import RiscemuBaseException
-from ..Executable import MemoryFlags, LoadedMemorySection
-from ..decoder import decode, RISCV_REGS, format_ins
+from .types import ElfMemorySection
 from ..helpers import FMT_PARSE, FMT_NONE, FMT_GREEN, FMT_BOLD
+from ..types import MemoryFlags, Program, ProgramLoader, T_ParserOpts

 FMT_ELF = FMT_GREEN + FMT_BOLD

@ -13,41 +11,53 @@ if typing.TYPE_CHECKING:
    from elftools.elf.elffile import ELFFile
    from elftools.elf.sections import Section, SymbolTableSection

-# This requires pyelftools package!
-
 INCLUDE_SEC = ('.text', '.stack', '.bss', '.sdata', '.sbss')


-class ElfExecutable:
-    sections: List['ElfLoadedMemorySection']
-    sections_by_name: Dict[str, 'ElfLoadedMemorySection']
-    symbols: Dict[str, int]
-    run_ptr: int
+class ElfBinaryFileLoader(ProgramLoader):
+    """
+    Loads compiled elf binaries (checks for the magic sequence 7f45 4c46)
+
+    This loader respects local and global symbols.
+    """
+    program: Program
+
+    def __init__(self, source_path: str, options: T_ParserOpts):
+        super().__init__(source_path, options)
+        self.program = Program(self.filename)
+
+    @classmethod
+    def can_parse(cls, source_path: str) -> float:
+        with open(source_path, 'rb') as f:
+            if f.read(4) == b'\x7f\x45\x4c\x46':
+                return 1
+        return 0

-    def __init__(self, name: str):
-        self.sections = list()
-        self.sections_by_name = dict()
-        self.symbols = dict()
+    @classmethod
+    def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
+        return argv, {}

+    def parse(self) -> Program:
        try:
            from elftools.elf.elffile import ELFFile
            from elftools.elf.sections import Section, SymbolTableSection

-            with open(name, 'rb') as f:
-                print(FMT_ELF + "[ElfLoader] Loading elf executable from: {}".format(name) + FMT_NONE)
+            with open(self.source_path, 'rb') as f:
+                print(FMT_ELF + "[ElfLoader] Loading elf executable from: {}".format(self.source_path) + FMT_NONE)
                self._read_elf(ELFFile(f))
        except ImportError as e:
-            print(FMT_PARSE + "[ElfLoader] Cannot load elf files without PyElfTools package! You can install them using pip install pyelftools!" + FMT_NONE)
+            print(FMT_PARSE + "[ElfLoader] Cannot load elf files without PyElfTools package! You can install them "
+                              "using pip install pyelftools!" + FMT_NONE)
            raise e

+        return self.program
+
    def _read_elf(self, elf: 'ELFFile'):
        if not elf.header.e_machine == 'EM_RISCV':
            raise InvalidElfException("Not a RISC-V elf file!")
        if not elf.header.e_ident.EI_CLASS == 'ELFCLASS32':
            raise InvalidElfException("Only 32bit executables are supported!")

-        self.run_ptr = elf.header.e_entry
-
        from elftools.elf.sections import SymbolTableSection
        for sec in elf.iter_sections():
            if isinstance(sec, SymbolTableSection):
@ -57,29 +67,33 @@ class ElfExecutable:
            if sec.name not in INCLUDE_SEC:
                continue

-            self.add_sec(self._lms_from_elf_sec(sec, 'kernel'))
+            self._add_sec(self._lms_from_elf_sec(sec, self.filename))

    def _lms_from_elf_sec(self, sec: 'Section', owner: str):
        is_code = sec.name in ('.text',)
        data = bytearray(sec.data())
+        if len(data) < sec.data_size:
+            data += bytearray(len(data) - sec.data_size)
        flags = MemoryFlags(is_code, is_code)
        print(FMT_ELF + "[ElfLoader] Section {} at: {:X}".format(sec.name, sec.header.sh_addr) + FMT_NONE)
-        return ElfLoadedMemorySection(
-            sec.name,
-            sec.header.sh_addr,
-            sec.data_size,
-            data,
-            flags,
-            owner
+        return ElfMemorySection(
+            data, sec.name, self.program.context, owner, sec.header.sh_addr, flags
        )

    def _parse_symtab(self, symtab: 'SymbolTableSection'):
-        self.symbols = {
-            sym.name: sym.entry.st_value for sym in symtab.iter_symbols() if sym.name
-        }
+        from elftools.elf.enums import ENUM_ST_VISIBILITY

-    def add_sec(self, new_sec: 'ElfLoadedMemorySection'):
-        for sec in self.sections:
+        for sym in symtab.iter_symbols():
+            if not sym.name:
+                continue
+            self.program.context.labels[sym.name] = sym.entry.st_value
+            # check if it has st_visibility bit set
+            if sym.entry.st_info.bind == 'STB_GLOBAL':
+                self.program.global_labels.add(sym.name)
+                print(FMT_PARSE + "LOADED GLOBAL SYMBOL {}: {}".format(sym.name, sym.entry.st_value) + FMT_NONE)
+
+    def _add_sec(self, new_sec: 'ElfMemorySection'):
+        for sec in self.program.sections:
            if sec.base < sec.end <= new_sec.base or sec.end > sec.base >= new_sec.end:
                continue
            else:
@ -88,78 +102,4 @@ class ElfExecutable:
                ) + FMT_NONE)
                raise RuntimeError("Cannot load elf with overlapping sections!")

-        self.sections.append(new_sec)
-        self.sections_by_name[new_sec.name] = new_sec
-
-
-class InvalidElfException(RiscemuBaseException):
-    def __init__(self, msg: str):
-        super().__init__()
-        self.msg = msg
-
-    def message(self):
-        return FMT_PARSE + "{}(\"{}\")".format(self.__class__.__name__, self.msg) + FMT_NONE
-
-
-@dataclass(frozen=True)
-class ElfInstruction:
-    name: str
-    args: List[int]
-    encoded: int
-
-    def get_imm(self, num: int) -> int:
-        return self.args[num]
-
-    def get_imm_reg(self, num: int) -> Tuple[int, int]:
-        return self.args[-1], self.args[-2]
-
-    def get_reg(self, num: int) -> str:
-        return RISCV_REGS[self.args[num]]
-
-    def __repr__(self) -> str:
-        if self.name == 'jal' and self.args[0] == 0:
-            return "j       {}".format(self.args[1])
-        if self.name == 'addi' and self.args[2] == 0:
-            return "mv      {}, {}".format(self.get_reg(0), self.get_reg(1))
-        if self.name == 'addi' and self.args[1] == 0:
-            return "li      {}, {}".format(self.get_reg(0), self.args[2])
-        if self.name == 'ret' and len(self.args) == 0:
-            return "ret"
-        return format_ins(self.encoded, self.name)
-        # if self.name in ('lw', 'lh', 'lb', 'lbu', 'lhu', 'sw', 'sh', 'sb'):
-        #     args = "{}, {}({})".format(
-        #         RISCV_REGS[self.args[0]], self.args[2], RISCV_REGS[self.args[1]]
-        #     )
-        # else:
-        #     args = ", ".join(map(str, self.args))
-        # return "{:<8} {}".format(
-        #     self.name,
-        #     args
-        # )
-
-
-class ElfLoadedMemorySection(LoadedMemorySection):
-    ins_cache: List[Optional[ElfInstruction]]
-    """
-    A fast cache for accessing pre-decoded instructions
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.__setattr__('ins_cache', [None] * (self.size // 4))
-
-    def read_instruction(self, offset):
-        if self.ins_cache[offset//4] is not None:
-            return self.ins_cache[offset//4]
-        if not self.flags.executable:
-            print(FMT_PARSE + "Reading instruction from non-executable memory!" + FMT_NONE)
-            raise InstructionAccessFault(offset + self.base)
-        if offset % 4 != 0:
-            raise InstructionAddressMisalignedTrap(offset + self.base)
-        ins = ElfInstruction(*decode(self.content[offset:offset + 4]))
-        self.ins_cache[offset // 4] = ins
-        return ins
-
-    @property
-    def end(self):
-        return self.size + self.base
+        self.program.add_section(new_sec)
--- a/riscemu/priv/Exceptions.py
+++ b/riscemu/priv/Exceptions.py
@ -5,6 +5,10 @@ from .CSRConsts import MCAUSE_TRANSLATION

 import typing

+from .. import RiscemuBaseException
+from ..colors import FMT_PARSE, FMT_NONE
+from ..types import UInt32
+
 if typing.TYPE_CHECKING:
    from .ElfLoader import ElfInstruction

@ -26,7 +30,7 @@ class CpuTrap(BaseException):
    The isInterrupt bit in the mstatus register
    """

-    mtval: int
+    mtval: UInt32
    """
    contents of the mtval register
    """
@ -44,7 +48,7 @@ class CpuTrap(BaseException):
    def __init__(self, code: int, mtval, type: CpuTrapType, priv: PrivModes = PrivModes.MACHINE):
        self.interrupt = 0 if type == CpuTrapType.EXCEPTION else 1
        self.code = code
-        self.mtval = mtval
+        self.mtval = UInt32(mtval)
        self.priv = priv
        self.type = type

@ -52,14 +56,17 @@ class CpuTrap(BaseException):
    def mcause(self):
        return (self.interrupt << 31) + self.code

+    def message(self) -> str:
+        return ""
+
    def __repr__(self):
        name = "Reserved interrupt({}, {})".format(self.interrupt, self.code)

        if (self.interrupt, self.code) in MCAUSE_TRANSLATION:
            name = MCAUSE_TRANSLATION[(self.interrupt, self.code)] + "({}, {})".format(self.interrupt, self.code)

-        return "{} {{priv={}, type={}, mtval={:x}}}".format(
-            name, self.priv.name, self.type.name, self.mtval
+        return "{} {{priv={}, type={}, mtval={:x}}} {}".format(
+            name, self.priv.name, self.type.name, self.mtval, self.message()
        )

    def __str__(self):
@ -89,3 +96,29 @@ class TimerInterrupt(CpuTrap):
 class EcallTrap(CpuTrap):
    def __init__(self, mode: PrivModes):
        super().__init__(mode.value + 8, 0, CpuTrapType.EXCEPTION)
+
+
+class InvalidElfException(RiscemuBaseException):
+    def __init__(self, msg: str):
+        super().__init__()
+        self.msg = msg
+
+    def message(self):
+        return FMT_PARSE + "{}(\"{}\")".format(self.__class__.__name__, self.msg) + FMT_NONE
+
+
+class LoadAccessFault(CpuTrap):
+    def __init__(self, msg, addr, size, op):
+        super(LoadAccessFault, self).__init__(5, addr, CpuTrapType.EXCEPTION)
+        self.msg = msg
+        self.addr = addr
+        self.size = size
+        self.op = op
+
+    def message(self):
+        return "(During {} at 0x{:08x} of size {}: {})".format(
+            self.op,
+            self.addr,
+            self.size,
+            self.msg
+        )
--- a/riscemu/priv/ImageLoader.py
+++ b/riscemu/priv/ImageLoader.py
@ -2,124 +2,74 @@
 Laods a memory image with debug information into memory
 """

-import json
-from functools import lru_cache
-from typing import Dict, List, Optional, TYPE_CHECKING
-
-from .ElfLoader import ElfInstruction, ElfLoadedMemorySection, InstructionAccessFault, InstructionAddressMisalignedTrap
-from .PrivMMU import PrivMMU
-from ..Config import RunConfig
-from ..Executable import LoadedMemorySection, MemoryFlags
-from ..IO.IOModule import IOModule
-from ..colors import FMT_ERROR, FMT_NONE, FMT_MEM
-from ..decoder import decode
-
-if TYPE_CHECKING:
-    pass
-
-
-class MemoryImageMMU(PrivMMU):
-    io: List[IOModule]
-    data: bytearray
-    io_start: int
-    debug_info: Dict[str, Dict[str, Dict[str, str]]]
-
-    def __init__(self, file_name: str, io_start: int = 0xFF0000):
-        super(MemoryImageMMU, self).__init__(conf=RunConfig())
-
-        with open(file_name, 'rb') as memf:
-            data = memf.read()
-        with open(file_name + '.dbg', 'r') as dbgf:
-            debug_info: Dict = json.load(dbgf)
-
-        self.data = bytearray(data)
-        # TODO: super wasteful memory allocation happening here
-        if len(data) < io_start:
-            self.data += bytearray(io_start - len(data))
-        self.debug_info = debug_info
-        self.io_start = io_start
-        self.io = list()
-
-    def get_entrypoint(self):
-        try:
-            start = self.debug_info['symbols']['kernel'].get('_start', None)
-            if start is not None:
-                return start
-            return self.debug_info['symbols']['kernel'].get('_ftext')
-        except KeyError:
-            print(FMT_ERROR + '[MMU] cannot find kernel entry in debug information! Falling back to 0x100' + FMT_NONE)
-            return 0x100
-
-    @lru_cache(maxsize=2048)
-    def read_ins(self, addr: int) -> ElfInstruction:
-        if addr >= self.io_start:
-            raise InstructionAccessFault(addr)
-        if addr % 4 != 0:
-            raise InstructionAddressMisalignedTrap(addr)
-
-        return ElfInstruction(*decode(self.data[addr:addr + 4]))
-
-    def read(self, addr: int, size: int) -> bytearray:
-        if addr < 0x100:
-            pc = self.cpu.pc
-            text_sec = self.get_sec_containing(pc)
-            print(FMT_ERROR + "[MMU] possible null dereference (read {:x}) from (pc={:x},sec={},rel={:x})".format(
-                addr, pc, text_sec.owner + ':' + text_sec.name, pc - text_sec.base
-            ) + FMT_NONE)
-        if addr >= self.io_start:
-            return self.io_at(addr).read(addr, size)
-        return self.data[addr: addr + size]
-
-    def write(self, addr: int, size: int, data):
-        if addr < 0x100:
-            pc = self.cpu.pc
-            text_sec = self.get_sec_containing(pc)
-            print(FMT_ERROR + "[MMU] possible null dereference (write {:x}) from (pc={:x},sec={},rel={:x})".format(
-                addr, pc, text_sec.owner + ':' + text_sec.name, pc - text_sec.base
-            ) + FMT_NONE)
-
-        if addr >= self.io_start:
-            return self.io_at(addr).write(addr, data, size)
-        self.data[addr:addr + size] = data[0:size]
-
-    def io_at(self, addr) -> IOModule:
-        for mod in self.io:
-            if mod.contains(addr):
-                return mod
-        raise InstructionAccessFault(addr)
-
-    def add_io(self, io: IOModule):
-        self.io.append(io)
-
-    def __repr__(self):
-        return "MemoryImageMMU()"
-
-    @lru_cache(maxsize=32)
-    def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]:
-        next_sec = len(self.data)
-        for sec_addr, name in reversed(self.debug_info['sections'].items()):
-            if addr >= int(sec_addr):
-                owner, name = name.split(':')
-                base = int(sec_addr)
-                size = next_sec - base
-                flags = MemoryFlags('.text' in name, '.text' in name)
-                return ElfLoadedMemorySection(name, base, size, self.data[base:next_sec], flags, owner)
-            else:
-                next_sec = int(sec_addr)
-
-    def translate_address(self, addr: int):
-        sec = self.get_sec_containing(addr)
-        if sec.name == '.empty':
-            return "<empty>"
-        symbs = self.debug_info['symbols'][sec.owner]
-        for sym, val in reversed(symbs.items()):
-            if addr >= val:
-                return "{}{:+x} ({}:{})".format(sym, addr - val, sec.owner, sec.name)
-        return "{}:{}{:+x}".format(sec.owner, sec.name, addr - sec.base)
-
-    def symbol(self, symb: str):
-        print(FMT_MEM + "Looking up symbol {}".format(symb))
-        for owner, symbs in self.debug_info['symbols'].items():
-            if symb in symbs:
-                print("  Hit in {}: {} = {}".format(owner, symb, symbs[symb]))
-        print(FMT_NONE, end="")
+import os.path
+from typing import List, Iterable
+
+from .ElfLoader import ElfMemorySection
+from .types import MemoryImageDebugInfos
+from ..assembler import INSTRUCTION_SECTION_NAMES
+from ..colors import FMT_NONE, FMT_PARSE
+from ..helpers import get_section_base_name
+from ..types import MemoryFlags, ProgramLoader, Program, T_ParserOpts
+
+
+class MemoryImageLoader(ProgramLoader):
+
+    @classmethod
+    def can_parse(cls, source_path: str) -> float:
+        if source_path.split('.')[-1] == 'img':
+            return 1
+        return 0
+
+    @classmethod
+    def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
+        return argv, {}
+
+    def parse(self) -> Iterable[Program]:
+        if 'debug' not in self.options:
+            yield self.parse_no_debug()
+            return
+
+        with open(self.options.get('debug'), 'r') as debug_file:
+            debug_info = MemoryImageDebugInfos.load(debug_file.read())
+
+        with open(self.source_path, 'rb') as source_file:
+            data: bytearray = bytearray(source_file.read())
+
+        for name, sections in debug_info.sections.items():
+            program = Program(name)
+
+            for sec_name, (start, size) in sections.items():
+                if program.base is None:
+                    program.base = start
+
+                #in_code_sec = get_section_base_name(sec_name) in INSTRUCTION_SECTION_NAMES
+                program.add_section(
+                    ElfMemorySection(
+                        data[start:start+size], sec_name, program.context,
+                        name, start, MemoryFlags(False, True)
+                    )
+                )
+
+            program.context.labels.update(debug_info.symbols.get(name, dict()))
+            program.global_labels.update(debug_info.globals.get(name, set()))
+
+            yield program
+
+    def parse_no_debug(self) -> Program:
+        print(FMT_PARSE + "[MemoryImageLoader] Warning: loading memory image without debug information!" + FMT_NONE)
+
+        with open(self.source_path, 'rb') as source_file:
+            data: bytes = source_file.read()
+
+        p = Program(self.filename)
+        p.add_section(ElfMemorySection(
+            bytearray(data), '.text', p.context, p.name, 0, MemoryFlags(False, True)
+        ))
+        return p
+
+    @classmethod
+    def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
+        if os.path.isfile(source_path + '.dbg'):
+            return MemoryImageLoader(source_path, dict(**options, debug=source_path + '.dbg'))
+        return MemoryImageLoader(source_path, options)
--- a/riscemu/priv/PrivCPU.py
+++ b/riscemu/priv/PrivCPU.py
@ -3,20 +3,23 @@ RiscEmu (c) 2021 Anton Lydike

 SPDX-License-Identifier: MIT
 """
+import sys
 import time

 from riscemu.CPU import *
 from .CSR import CSR
+from .ElfLoader import ElfBinaryFileLoader
 from .Exceptions import *
+from .ImageLoader import MemoryImageLoader
 from .PrivMMU import PrivMMU
 from .PrivRV32I import PrivRV32I
 from .privmodes import PrivModes
-from ..IO import TextIO
+from ..IO.TextIO import TextIO
 from ..instructions import RV32A, RV32M
+from ..types import Program, UInt32

 if typing.TYPE_CHECKING:
-    from riscemu import Executable, LoadedExecutable, LoadedInstruction
-    from riscemu.instructions.InstructionSet import InstructionSet
+    from riscemu.instructions.instruction_set import InstructionSet


 class PrivCPU(CPU):
@ -25,7 +28,7 @@ class PrivCPU(CPU):

    It should support M and U Mode, but no U-Mode Traps.

-    This allows us to
+    This is meant to emulate whole operating systems.
    """

    csr: CSR
@ -38,105 +41,98 @@ class PrivCPU(CPU):
    controls the resolution of the time csr register (in nanoseconds)
    """

-    INS_XLEN = 4
+    pending_traps: List[CpuTrap]
    """
-    Size of an instruction in memory. Should be 4, but since our loading code is shit, instruction take up 
-    the equivalent of "1 byte" (this is actually impossible)
+    A list of traps which are pending to be handled
    """

-    def __init__(self, conf, mmu: PrivMMU):
-        super().__init__(conf, [PrivRV32I, RV32M, RV32A])
+    def __init__(self, conf):
+        super().__init__(PrivMMU(), [PrivRV32I, RV32M, RV32A], conf)
+        # start in machine mode
        self.mode: PrivModes = PrivModes.MACHINE

-        mmu.set_cpu(self)
-        self.pc = mmu.get_entrypoint()
-        self.mmu = mmu
-
-        if hasattr(self.mmu, 'add_io'):
-            self.mmu.add_io(TextIO.TextIO(0xff0000, 64))
-
-        self.syscall_int = None
-        self.launch_debug = False
        self.pending_traps: List[CpuTrap] = list()

+        self.exit_code = 0
+
        self._time_start = 0
-        self._time_timecmp = 0
+        self._time_timecmp = UInt32(0)
        self._time_interrupt_enabled = False

        # performance counters
        self._perf_counters = list()

+        # add TextIO
+        io = TextIO(0xFF0000, 64)
+        self.mmu.load_section(io, True)
+
        # init csr
        self._init_csr()

-    def _run(self, verbose=False):
+        self.TIME_RESOLUTION_NS = int(self.TIME_RESOLUTION_NS * conf.slowdown)
+
+    def run(self, verbose=False):
        if self.pc <= 0:
            return False
-        ins = None
+
+        launch_debug = False
+
        try:
-            while not self.exit:
+            while not self.halted:
                self.step(verbose)
        except RiscemuBaseException as ex:
            if isinstance(ex, LaunchDebuggerException):
-                self.launch_debug = True
+                launch_debug = True
                self.pc += self.INS_XLEN

-        if self.exit:
+        if self.halted:
            print()
-            print(FMT_CPU + "Program exited with code {}".format(self.exit_code) + FMT_NONE)
+            print(FMT_CPU + "[CPU] System halted with code {}".format(self.exit_code) + FMT_NONE)
            sys.exit(self.exit_code)
-        elif self.launch_debug:
-            self.launch_debug = False
-            launch_debug_session(self, self.mmu, self.regs,
-                                 "Launching debugger:")
-            if not self.active_debug:
-                self._run(verbose)
+
+        elif launch_debug:
+            launch_debug_session(self)
+            if not self.debugger_active:
+                self.run(verbose)
        else:
            print()
-            print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE)
+            print(FMT_CPU + "[CPU] System stopped without halting - perhaps you stopped the debugger?" + FMT_NONE)

-    def load(self, e: riscemu.Executable):
-        raise NotImplementedError("Not supported!")
-
-    def run_loaded(self, le: 'riscemu.LoadedExecutable'):
-        raise NotImplementedError("Not supported!")
-
-    def get_tokenizer(self, tokenizer_input):
-        raise NotImplementedError("Not supported!")
-
-    def run(self, verbose: bool = False):
+    def launch(self, program: Optional[Program] = None, verbose: bool = False):
        print(FMT_CPU + '[CPU] Started running from 0x{:08X} ({})'.format(self.pc, "kernel") + FMT_NONE)
        self._time_start = time.perf_counter_ns() // self.TIME_RESOLUTION_NS
-        self._run(self.conf.verbosity > 1)
+
+        self.run(self.conf.verbosity > 1 or verbose)
+
+    def load_program(self, program: Program):
+        if program.name == 'kernel':
+            self.pc = program.entrypoint
+        super().load_program(program)

    def _init_csr(self):
        # set up CSR
        self.csr = CSR()
-        self.csr.set('mhartid', 0)  # core id
+        self.csr.set('mhartid', UInt32(0))  # core id
        # TODO: set correct value
-        self.csr.set('mimpid', 0)  # implementation id
+        self.csr.set('mimpid', UInt32(0))  # implementation id
        # set mxl to 1 (32 bit) and set bits for i and m isa
-        self.csr.set('misa', (1 << 30) + (1 << 8) + (1 << 12))  # available ISA
+        self.csr.set('misa', UInt32((1 << 30) + (1 << 8) + (1 << 12)))  # available ISA

        # CSR write callbacks:

        @self.csr.callback('halt')
-        def halt(old: int, new: int):
+        def halt(old: UInt32, new: UInt32):
            if new != 0:
-                self.exit = True
-                self.exit_code = new
-
-        @self.csr.callback('mstatus')
-        def mstatus(old: int, new: int):
-            pass
+                self.halted = True
+                self.exit_code = new.value

        @self.csr.callback('mtimecmp')
-        def mtimecmp(old, new):
+        def mtimecmp(old: UInt32, new: UInt32):
            self._time_timecmp = (self.csr.get('mtimecmph') << 32) + new
            self._time_interrupt_enabled = True

        @self.csr.callback('mtimecmph')
-        def mtimecmph(old, new):
+        def mtimecmph(old: UInt32, new: UInt32):
            self._time_timecmp = (new << 32) + self.csr.get('mtimecmp')
            self._time_interrupt_enabled = True

@ -144,11 +140,11 @@ class PrivCPU(CPU):

        @self.csr.virtual_register('time')
        def get_time():
-            return (time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start) & (2 ** 32 - 1)
+            return UInt32(time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start)

        @self.csr.virtual_register('timeh')
        def get_timeh():
-            return (time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start) >> 32
+            return UInt32((time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start) >> 32)

        # add minstret and mcycle counters

@ -163,7 +159,7 @@ class PrivCPU(CPU):
                self._timer_step()
            self._check_interrupt()
            ins = self.mmu.read_ins(self.pc)
-            if verbose and self.mode == PrivModes.USER:
+            if verbose and (self.mode == PrivModes.USER or self.conf.verbosity > 4):
                print(FMT_CPU + "   Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins))
            self.run_instruction(ins)
            self.pc += self.INS_XLEN
@ -175,6 +171,7 @@ class PrivCPU(CPU):
                    self.mmu.translate_address(self.pc),
                    self.pc
                ) + FMT_NONE)
+                breakpoint()
                if self.conf.debug_on_exception:
                    raise LaunchDebuggerException()
            self.pc += self.INS_XLEN
@ -190,10 +187,11 @@ class PrivCPU(CPU):
        if not (len(self.pending_traps) > 0 and self.csr.get_mstatus('mie')):
            return
        # select best interrupt
-        # TODO: actually select based on the official ranking
+        # FIXME: actually select based on the official ranking
        trap = self.pending_traps.pop()  # use the most recent trap
        if self.conf.verbosity > 0:
            print(FMT_CPU + "[CPU] taking trap {}!".format(trap) + FMT_NONE)
+            self.regs.dump_reg_a()

        if trap.priv != PrivModes.MACHINE:
            print(FMT_CPU + "[CPU] Trap not targeting machine mode encountered! - undefined behaviour!" + FMT_NONE)
@ -204,18 +202,18 @@ class PrivCPU(CPU):

        self.csr.set_mstatus('mpie', self.csr.get_mstatus('mie'))
        self.csr.set_mstatus('mpp', self.mode.value)
-        self.csr.set_mstatus('mie', 0)
+        self.csr.set_mstatus('mie', UInt32(0))
        self.csr.set('mcause', trap.mcause)
        self.csr.set('mepc', self.pc - self.INS_XLEN)
        self.csr.set('mtval', trap.mtval)
        self.mode = trap.priv
        mtvec = self.csr.get('mtvec')
        if mtvec & 0b11 == 0:
-            self.pc = mtvec
+            self.pc = mtvec.value
        if mtvec & 0b11 == 1:
-            self.pc = (mtvec & 0b11111111111111111111111111111100) + (trap.code * 4)
+            self.pc = ((mtvec & 0b11111111111111111111111111111100) + (trap.code * 4)).value
        self.record_perf_profile()
-        if len(self._perf_counters) % 100 == 0:
+        if len(self._perf_counters) > 100:
            self.show_perf()

    def show_perf(self):
@ -231,11 +229,6 @@ class PrivCPU(CPU):
                continue
            cps = (cycle - cycled) / (time_ns - timed) * 1000000000

-            # print("    {:03d} cycles in {:08d}ns ({:.2f} cycles/s)".format(
-            #    cycle - cycled,
-            #    time_ns - timed,
-            #    cps
-            # ))
            cycled = cycle
            timed = time_ns
            cps_list.append(cps)
@ -244,3 +237,9 @@ class PrivCPU(CPU):

    def record_perf_profile(self):
        self._perf_counters.append((time.perf_counter_ns(), self.cycle))
+
+    @classmethod
+    def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
+        return [
+            AssemblyFileLoader, MemoryImageLoader, ElfBinaryFileLoader
+        ]
--- a/riscemu/priv/PrivMMU.py
+++ b/riscemu/priv/PrivMMU.py
@ -1,42 +1,43 @@
+from .types import ElfMemorySection
 from ..MMU import *
 from abc import abstractmethod

 import typing

-from .ElfLoader import ElfExecutable
-
 if typing.TYPE_CHECKING:
    from .PrivCPU import PrivCPU


 class PrivMMU(MMU):
-    cpu: 'PrivCPU'
-
-    @abstractmethod
-    def get_entrypoint(self) -> int:
-        raise

-    def set_cpu(self, cpu: 'PrivCPU'):
-        self.cpu = cpu
+    def get_sec_containing(self, addr: T_AbsoluteAddress) -> MemorySection:
+        # try to get an existing section
+        existing_sec = super().get_sec_containing(addr)

-    def translate_address(self, addr: int):
-        return ""
+        if existing_sec is not None:
+            return existing_sec

+        # get section preceding empty space at addr
+        sec_before = next((sec for sec in reversed(self.sections) if sec.end < addr), None)
+        # get sec succeeding empty space at addr
+        sec_after = next((sec for sec in self.sections if sec.base > addr), None)

-class LoadedElfMMU(PrivMMU):
-    def __init__(self, elf: ElfExecutable):
-        super().__init__(conf=RunConfig())
-        self.entrypoint = elf.symbols['_start']
+        # calc start end end of "free" space
+        prev_sec_end = 0 if sec_before is None else sec_before.end
+        next_sec_start = 0x7FFFFFFF if sec_after is None else sec_after.base

-        self.binaries.append(elf)
-        for sec in elf.sections:
-            self.sections.append(sec)
+        # start at the end of the prev section, or current address - 0xFFFF (aligned to 16 byte boundary)
+        start = max(prev_sec_end, align_addr(addr - 0xFFFF, 16))
+        # end at the start of the next section, or address + 0xFFFF (aligned to 16 byte boundary)
+        end = min(next_sec_start, align_addr(addr + 0xFFFF, 16))

-    def load_bin(self, exe: Executable) -> LoadedExecutable:
-        raise NotImplementedError("This is a privMMU, it's initialized with a single ElfExecutable!")
+        sec = ElfMemorySection(bytearray(end - start), '.empty', self.global_instruction_context(), '', start, MemoryFlags(False, True))
+        self.sections.append(sec)
+        self._update_state()

-    def allocate_section(self, name: str, req_size: int, flag: MemoryFlags):
-        raise NotImplementedError("Not supported!")
+        return sec

-    def get_entrypoint(self):
-        return self.entrypoint
+    def global_instruction_context(self) -> InstructionContext:
+        context = InstructionContext()
+        context.global_symbol_dict = self.global_symbols
+        return context
--- a/riscemu/priv/PrivRV32I.py
+++ b/riscemu/priv/PrivRV32I.py
@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT
 """

 from ..instructions.RV32I import *
-from ..Exceptions import INS_NOT_IMPLEMENTED
+from riscemu.types.exceptions import INS_NOT_IMPLEMENTED
 from .Exceptions import *
 from .privmodes import PrivModes
 from ..colors import FMT_CPU, FMT_NONE
@ -21,7 +21,7 @@ class PrivRV32I(RV32I):
    This is an extension of RV32I, written for the PrivCPU class
    """

-    def instruction_csrrw(self, ins: 'LoadedInstruction'):
+    def instruction_csrrw(self, ins: 'Instruction'):
        rd, rs, csr_addr = self.parse_crs_ins(ins)
        old_val = None
        if rd != 'zero':
@ -34,7 +34,7 @@ class PrivRV32I(RV32I):
        if old_val is not None:
            self.regs.set(rd, old_val)

-    def instruction_csrrs(self, ins: 'LoadedInstruction'):
+    def instruction_csrrs(self, ins: 'Instruction'):
        rd, rs, csr_addr = self.parse_crs_ins(ins)
        if rs != 'zero':
            # oh no, this should not happen!
@ -44,14 +44,13 @@ class PrivRV32I(RV32I):
            old_val = self.cpu.csr.get(csr_addr)
            self.regs.set(rd, old_val)

-
-    def instruction_csrrc(self, ins: 'LoadedInstruction'):
+    def instruction_csrrc(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_csrrsi(self, ins: 'LoadedInstruction'):
+    def instruction_csrrsi(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_csrrwi(self, ins: 'LoadedInstruction'):
+    def instruction_csrrwi(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        rd, imm, addr = ins.get_reg(0), ins.get_imm(1), ins.get_imm(2)
        if rd != 'zero':
@ -61,11 +60,10 @@ class PrivRV32I(RV32I):
        self.cpu.csr.assert_can_write(self.cpu.mode, addr)
        self.cpu.csr.set(addr, imm)

-
-    def instruction_csrrci(self, ins: 'LoadedInstruction'):
+    def instruction_csrrci(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_mret(self, ins: 'LoadedInstruction'):
+    def instruction_mret(self, ins: 'Instruction'):
        if self.cpu.mode != PrivModes.MACHINE:
            print("MRET not inside machine level code!")
            raise IllegalInstructionTrap(ins)
@ -77,10 +75,10 @@ class PrivRV32I(RV32I):
        self.cpu.mode = PrivModes(mpp)
        # restore pc
        mepc = self.cpu.csr.get('mepc')
-        self.cpu.pc = mepc - self.cpu.INS_XLEN
+        self.cpu.pc = (mepc - self.cpu.INS_XLEN).value

        if self.cpu.conf.verbosity > 0:
-            sec = self.mmu.get_sec_containing(mepc)
+            sec = self.mmu.get_sec_containing(mepc.value)
            if sec is not None:
                print(FMT_CPU + "[CPU] returning to mode {} in {} (0x{:x})".format(
                    PrivModes(mpp).name,
@ -90,78 +88,83 @@ class PrivRV32I(RV32I):
                if self.cpu.conf.verbosity > 1:
                    self.regs.dump_reg_a()

-    def instruction_uret(self, ins: 'LoadedInstruction'):
+    def instruction_uret(self, ins: 'Instruction'):
        raise IllegalInstructionTrap(ins)

-    def instruction_sret(self, ins: 'LoadedInstruction'):
+    def instruction_sret(self, ins: 'Instruction'):
        raise IllegalInstructionTrap(ins)

-    def instruction_scall(self, ins: 'LoadedInstruction'):
+    def instruction_scall(self, ins: 'Instruction'):
        """
        Overwrite the scall from userspace RV32I
        """
        raise EcallTrap(self.cpu.mode)

-    def instruction_beq(self, ins: 'LoadedInstruction'):
+    def instruction_beq(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 == rs2:
-            self.pc += dst - 4
+            self.pc += dst.value - 4

-    def instruction_bne(self, ins: 'LoadedInstruction'):
+    def instruction_bne(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 != rs2:
-            self.pc += dst - 4
+            self.pc += dst.value - 4

-    def instruction_blt(self, ins: 'LoadedInstruction'):
+    def instruction_blt(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 < rs2:
-            self.pc += dst - 4
+            self.pc += dst.value - 4

-    def instruction_bge(self, ins: 'LoadedInstruction'):
+    def instruction_bge(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 >= rs2:
-            self.pc += dst - 4
+            self.pc += dst.value - 4

-    def instruction_bltu(self, ins: 'LoadedInstruction'):
+    def instruction_bltu(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
        if rs1 < rs2:
-            self.pc += dst - 4
+            self.pc += dst.value - 4

-    def instruction_bgeu(self, ins: 'LoadedInstruction'):
+    def instruction_bgeu(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
        if rs1 >= rs2:
-            self.pc += dst - 4
+            self.pc += dst.value - 4

    # technically deprecated
-    def instruction_j(self, ins: 'LoadedInstruction'):
+    def instruction_j(self, ins: 'Instruction'):
        raise NotImplementedError("Should never be reached!")

-    def instruction_jal(self, ins: 'LoadedInstruction'):
+    def instruction_jal(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
        addr = ins.get_imm(1)
-        if reg == 'ra' and self.cpu.mode == PrivModes.USER and self.cpu.conf.verbosity > 1:
-            print(FMT_CPU + 'Jumping to {} (0x{:x})'.format(
+        if reg == 'ra' and (
+                (self.cpu.mode == PrivModes.USER and self.cpu.conf.verbosity > 1) or
+                (self.cpu.conf.verbosity > 3)
+        ):
+            print(FMT_CPU + 'Jumping from 0x{:x} to {} (0x{:x})'.format(
+                self.pc,
                self.mmu.translate_address(self.pc + addr),
                self.pc + addr
            ) + FMT_NONE)
-        self.regs.set(reg, self.pc)
+            self.regs.dump_reg_a()
+        self.regs.set(reg, Int32(self.pc))
        self.pc += addr - 4

-    def instruction_jalr(self, ins: 'LoadedInstruction'):
+    def instruction_jalr(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        rd, rs, imm = self.parse_rd_rs_imm(ins)
-        self.regs.set(rd, self.pc)
-        self.pc = rs + imm - 4
+        self.regs.set(rd, Int32(self.pc))
+        self.pc = rs.value + imm.value - 4

-    def instruction_sbreak(self, ins: 'LoadedInstruction'):
+    def instruction_sbreak(self, ins: 'Instruction'):
        raise LaunchDebuggerException()

-    def parse_crs_ins(self, ins: 'LoadedInstruction'):
+    def parse_crs_ins(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        return ins.get_reg(0), ins.get_reg(1), ins.get_imm(2)

-    def parse_mem_ins(self, ins: 'LoadedInstruction') -> Tuple[str, int]:
+    def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, int]:
        ASSERT_LEN(ins.args, 3)
        addr = self.get_reg_content(ins, 1) + ins.get_imm(2)
        reg = ins.get_reg(0)
--- a/riscemu/priv/main.py
+++ b/riscemu/priv/main.py
@ -1,7 +1,6 @@
-from .PrivCPU import PrivCPU, RunConfig
-from .ImageLoader import MemoryImageMMU
-from .PrivMMU import LoadedElfMMU
-from .ElfLoader import ElfExecutable
+from riscemu import RunConfig
+from riscemu.types import Program
+from .PrivCPU import PrivCPU

 import sys

@ -10,26 +9,29 @@ if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='RISC-V privileged architecture emulator', prog='riscemu')

-    parser.add_argument('--kernel', type=str, help='Kernel elf loaded with user programs', nargs='?')
-    parser.add_argument('--image', type=str, help='Memory image containing kernel', nargs='?')
-    parser.add_argument('--debug-exceptions', help='Launch the interactive debugger when an exception is generated', action='store_true')
+    parser.add_argument('source', type=str,
+                        help='Compiled RISC-V ELF file or memory image containing compiled RISC-V ELF files', nargs='+')
+    parser.add_argument('--debug-exceptions', help='Launch the interactive debugger when an exception is generated',
+                        action='store_true')

-    parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count', default=0)
+    parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count',
+                        default=0)

-    args = parser.parse_args()
-    mmu = None
-
-    if args.kernel is not None:
-        mmu = LoadedElfMMU(ElfExecutable(args.kernel))
-    elif args.image is not None:
-        mmu = MemoryImageMMU(args.image)
+    parser.add_argument('--slowdown', help="Slow down the emulated CPU clock by a factor", type=float, default=1)

-    if mmu is None:
-        print("You must specify one of --kernel or --image for running in privilege mode!")
-        sys.exit(1)
-
-    cpu = PrivCPU(RunConfig(verbosity=args.verbose, debug_on_exception=args.debug_exceptions), mmu)
-    cpu.run()
+    args = parser.parse_args()

+    cpu = PrivCPU(RunConfig(verbosity=args.verbose, debug_on_exception=args.debug_exceptions, slowdown=args.slowdown))

+    for source_path in args.source:
+        loader = max((loader for loader in cpu.get_loaders()), key=lambda l: l.can_parse(source_path))
+        argv, opts = loader.get_options(sys.argv)
+        program = loader.instantiate(source_path, opts).parse()
+        if isinstance(program, Program):
+            cpu.load_program(program)
+        else:
+            program_iter = program
+            for program in program_iter:
+                cpu.load_program(program)

+    cpu.launch(verbose=args.verbose > 4)
--- a/riscemu/priv/types.py
+++ b/riscemu/priv/types.py
@ -0,0 +1,147 @@
+import json
+from collections import defaultdict
+from dataclasses import dataclass
+from functools import lru_cache
+from typing import Tuple, Dict, Set
+
+from riscemu.colors import FMT_NONE, FMT_PARSE
+from riscemu.decoder import format_ins, RISCV_REGS, decode
+from riscemu.priv.Exceptions import InstructionAccessFault, InstructionAddressMisalignedTrap, LoadAccessFault
+from riscemu.types import Instruction, InstructionContext, T_RelativeAddress, MemoryFlags, T_AbsoluteAddress, \
+    BinaryDataMemorySection
+
+
+@dataclass(frozen=True)
+class ElfInstruction(Instruction):
+    name: str
+    args: Tuple[int]
+    encoded: int
+
+    def get_imm(self, num: int) -> int:
+        return self.args[num]
+
+    def get_imm_reg(self, num: int) -> Tuple[int, int]:
+        return self.args[-1], self.args[-2]
+
+    def get_reg(self, num: int) -> str:
+        return RISCV_REGS[self.args[num]]
+
+    def __repr__(self) -> str:
+        if self.name == 'jal' and self.args[0] == 0:
+            return "j       {}".format(self.args[1])
+        if self.name == 'addi' and self.args[2] == 0:
+            return "mv      {}, {}".format(self.get_reg(0), self.get_reg(1))
+        if self.name == 'addi' and self.args[1] == 0:
+            return "li      {}, {}".format(self.get_reg(0), self.args[2])
+        if self.name == 'ret' and len(self.args) == 0:
+            return "ret"
+        return format_ins(self.encoded, self.name)
+
+
+class ElfMemorySection(BinaryDataMemorySection):
+    def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int,
+                 flags: MemoryFlags):
+        super().__init__(data, name, context, owner, base=base, flags=flags)
+        self.read_ins = lru_cache(maxsize=self.size // 4)(self.read_ins)
+
+    def read_ins(self, offset):
+        if not self.flags.executable:
+            print(FMT_PARSE + "Reading instruction from non-executable memory!" + FMT_NONE)
+            raise InstructionAccessFault(offset + self.base)
+        if offset % 4 != 0:
+            raise InstructionAddressMisalignedTrap(offset + self.base)
+        return ElfInstruction(*decode(self.data[offset:offset + 4]))
+
+    def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
+        if self.flags.read_only:
+            raise LoadAccessFault('read-only section', offset + self.base, size, 'write')
+        self.read_ins.cache_clear()
+        return super(ElfMemorySection, self).write(offset, size, data)
+
+    @property
+    def end(self):
+        return self.size + self.base
+
+
+class MemoryImageDebugInfos:
+    VERSION = '1.0.0'
+    """
+    Schema version
+    """
+
+    base: T_AbsoluteAddress = 0
+    """
+    The base address where the image starts. Defaults to zero.
+    """
+
+    sections: Dict[str, Dict[str, Tuple[int, int]]]
+    """
+    This dictionary maps a program and section to (start address, section length)
+    """
+
+    symbols: Dict[str, Dict[str, int]]
+    """
+    This dictionary maps a program and a symbol to a value
+    """
+
+    globals: Dict[str, Set[str]]
+    """
+    This dictionary contains the list of all global symbols of a given program
+    """
+
+    def __init__(self,
+                 sections: Dict[str, Dict[str, Tuple[int, int]]],
+                 symbols: Dict[str, Dict[str, int]],
+                 globals: Dict[str, Set[str]],
+                 base: int = 0
+                 ):
+        self.sections = sections
+        self.symbols = symbols
+        self.globals = globals
+        for name in globals:
+            globals[name] = set(globals[name])
+        self.base = base
+
+    def serialize(self) -> str:
+        def serialize(obj: any) -> str:
+            if isinstance(obj, defaultdict):
+                return json.dumps(dict(obj), default=serialize)
+            if isinstance(obj, (set, tuple)):
+                return json.dumps(list(obj), default=serialize)
+            return "<<unserializable {}>>".format(getattr(obj, '__qualname__', '{unknown}'))
+
+        return json.dumps(
+            dict(
+                sections=self.sections,
+                symbols=self.symbols,
+                globals=self.globals,
+                base=self.base,
+                VERSION=self.VERSION
+            ),
+            default=serialize
+        )
+
+    @classmethod
+    def load(cls, serialized_str: str) -> 'MemoryImageDebugInfos':
+        json_obj: dict = json.loads(serialized_str)
+
+        if 'VERSION' not in json_obj:
+            raise RuntimeError("Unknown MemoryImageDebugInfo version!")
+
+        version: str = json_obj.pop('VERSION')
+
+        # compare major version
+        if version != cls.VERSION and version.split('.')[0] != cls.VERSION.split('.')[0]:
+            raise RuntimeError(
+                "Unknown MemoryImageDebugInfo version! This emulator expects version {}, debug info version {}".format(
+                    cls.VERSION, version
+                )
+            )
+
+        return MemoryImageDebugInfos(**json_obj)
+
+    @classmethod
+    def builder(cls) -> 'MemoryImageDebugInfos':
+        return MemoryImageDebugInfos(
+            defaultdict(dict), defaultdict(dict), defaultdict(set)
+        )
--- a/riscemu/registers.py
+++ b/riscemu/registers.py
@ -1,28 +1,27 @@
 """
-RiscEmu (c) 2021 Anton Lydike
+RiscEmu (c) 2021-2022 Anton Lydike

 SPDX-License-Identifier: MIT
 """

-from .Config import RunConfig
-from .helpers import *
 from collections import defaultdict
-from .Exceptions import InvalidRegisterException
+
+from .helpers import *
+
+if typing.TYPE_CHECKING:
+    from .types import Int32
+

 class Registers:
    """
    Represents a bunch of registers
    """

-    def __init__(self, conf: RunConfig):
-        """
-        Initialize the register configuration, respecting the RunConfig conf
-        :param conf: The RunConfig
-        """
-        self.vals = defaultdict(lambda: 0)
+    def __init__(self):
+        from .types import Int32
+        self.vals = defaultdict(lambda: Int32(0))
        self.last_set = None
        self.last_read = None
-        self.conf = conf

    def dump(self, full=False):
        """
@ -86,7 +85,7 @@ class Registers:
            return FMT_GRAY + txt + FMT_NONE
        return txt

-    def set(self, reg, val, mark_set=True) -> bool:
+    def set(self, reg, val: 'Int32', mark_set=True) -> bool:
        """
        Set a register content to val
        :param reg: The register to set
@ -94,9 +93,15 @@ class Registers:
        :param mark_set: If True, marks this register as "last accessed" (only used internally)
        :return: If the operation was successful
        """
+
+        from .types import Int32
+        # remove after refactoring is complete
+        if not isinstance(val, Int32):
+            raise RuntimeError("Setting register to non-Int32 value! Please refactor your code!")
+
        if reg == 'zero':
            return False
-        #if reg not in Registers.all_registers():
+        # if reg not in Registers.all_registers():
        #    raise InvalidRegisterException(reg)
        # replace fp register with s1, as these are the same register
        if reg == 'fp':
@ -104,17 +109,17 @@ class Registers:
        if mark_set:
            self.last_set = reg
        # check 32 bit signed bounds
-        self.vals[reg] = bind_twos_complement(val)
+        self.vals[reg] = val.unsigned()
        return True

-    def get(self, reg, mark_read=True):
+    def get(self, reg, mark_read=True) -> 'Int32':
        """
        Retuns the contents of register reg
        :param reg: The register name
        :param mark_read: If the register should be markes as "last read" (only used internally)
        :return: The contents of register reg
        """
-        #if reg not in Registers.all_registers():
+        # if reg not in Registers.all_registers():
        #    raise InvalidRegisterException(reg)
        if reg == 'fp':
            reg = 's0'
--- a/riscemu/syscall.py
+++ b/riscemu/syscall.py
@ -4,23 +4,19 @@ RiscEmu (c) 2021 Anton Lydike
 SPDX-License-Identifier: MIT
 """

+import sys
 from dataclasses import dataclass
 from typing import Dict, IO
-import sys

 from .helpers import *

-import riscemu
-
-import typing
-
 if typing.TYPE_CHECKING:
-    from . import CPU
+    from riscemu.CPU import UserModeCPU

 SYSCALLS = {
-    63:   'read',
-    64:   'write',
-    93:   'exit',
+    63: 'read',
+    64: 'write',
+    93: 'exit',
    1024: 'open',
    1025: 'close',
 }
@ -35,6 +31,7 @@ OPEN_MODES = {
 }
 """All available file open modes"""

+
@dataclass(frozen=True)
 class Syscall:
    """
@ -42,7 +39,7 @@ class Syscall:
    """
    id: int
    """The syscall number (e.g. 64 - write)"""
-    cpu: 'riscemu.CPU'
+    cpu: 'UserModeCPU'
    """The CPU object that created the syscall"""

    @property
@ -55,7 +52,7 @@ class Syscall:
        )

    def ret(self, code):
-        self.cpu.regs.set('a0', code)
+        self.cpu.regs.set('a0', Int32(code))


 def get_syscall_symbols():
@ -94,9 +91,9 @@ class SyscallInterface:
        read syscall (63): read from file no a0, into addr a1, at most a2 bytes
        on return a0 will be the number of read bytes or -1 if an error occured
        """
-        fileno = scall.cpu.regs.get('a0')
-        addr = scall.cpu.regs.get('a1')
-        size = scall.cpu.regs.get('a2')
+        fileno = scall.cpu.regs.get('a0').unsigned_value
+        addr = scall.cpu.regs.get('a1').unsigned_value
+        size = scall.cpu.regs.get('a2').unsigned_value
        if fileno not in self.open_files:
            scall.cpu.regs.set('a0', -1)
            return
@ -116,9 +113,9 @@ class SyscallInterface:
        write syscall (64): write a2 bytes from addr a1 into fileno a0
        on return a0 will hold the number of bytes written or -1 if an error occured
        """
-        fileno = scall.cpu.regs.get('a0')
-        addr = scall.cpu.regs.get('a1')
-        size = scall.cpu.regs.get('a2')
+        fileno = scall.cpu.regs.get('a0').unsigned_value
+        addr = scall.cpu.regs.get('a1').unsigned_value
+        size = scall.cpu.regs.get('a2').unsigned_value
        if fileno not in self.open_files:
            return scall.ret(-1)

@ -145,13 +142,14 @@ class SyscallInterface:

        Requires running with flag scall-fs
        """
-        if not scall.cpu.conf.scall_fs:
+        # FIXME: this should be toggleable in a global setting or somethign
+        if True:
            print(FMT_SYSCALL + '[Syscall] open: opening files not supported without scall-fs flag!' + FMT_NONE)
            return scall.ret(-1)

-        mode = scall.cpu.regs.get('a0')
-        addr = scall.cpu.regs.get('a1')
-        size = scall.cpu.regs.get('a2')
+        mode = scall.cpu.regs.get('a0').unsigned_value
+        addr = scall.cpu.regs.get('a1').unsigned_value
+        size = scall.cpu.regs.get('a2').unsigned_value

        mode_st = OPEN_MODES.get(mode, )
        if mode_st == -1:
@ -178,7 +176,7 @@ class SyscallInterface:

        return -1 if an error was encountered, otherwise returns 0
        """
-        fileno = scall.cpu.regs.get('a0')
+        fileno = scall.cpu.regs.get('a0').unsigned_value
        if fileno not in self.open_files:
            print(FMT_SYSCALL + '[Syscall] close: unknown fileno {}!'.format(fileno) + FMT_NONE)
            return scall.ret(-1)
@ -192,11 +190,11 @@ class SyscallInterface:
        """
        Exit syscall. Exits the system with status code a0
        """
-        scall.cpu.exit = True
-        scall.cpu.exit_code = scall.cpu.regs.get('a0')
+        scall.cpu.halted = True
+        scall.cpu.exit_code = scall.cpu.regs.get('a0').value

    def __repr__(self):
        return "{}(\n\tfiles={}\n)".format(
            self.__class__.__name__,
            self.open_files
-        )
+        )
--- a/riscemu/tokenizer.py
+++ b/riscemu/tokenizer.py
@ -0,0 +1,135 @@
+"""
+RiscEmu (c) 2021 Anton Lydike
+
+SPDX-License-Identifier: MIT
+"""
+
+import re
+from dataclasses import dataclass
+from enum import Enum, auto
+from typing import List, Iterable
+
+from riscemu.decoder import RISCV_REGS
+from riscemu.types.exceptions import ParseException
+
+LINE_COMMENT_STARTERS = ('#', ';', '//')
+WHITESPACE_PATTERN = re.compile(r'\s+')
+MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+|[A-z0-9_-]+)\(([A-z]+[0-9]{0,2})\)$')
+REGISTER_NAMES = RISCV_REGS
+
+
+class TokenType(Enum):
+    COMMA = auto()
+    ARGUMENT = auto()
+    PSEUDO_OP = auto()
+    INSTRUCTION_NAME = auto()
+    NEWLINE = auto()
+    LABEL = auto()
+
+
+@dataclass(frozen=True)
+class Token:
+    type: TokenType
+    value: str
+
+    def __str__(self):
+        if self.type == TokenType.NEWLINE:
+            return '\\n'
+        if self.type == TokenType.COMMA:
+            return ', '
+        return '{}({})'.format(self.type.name[0:3], self.value)
+
+
+NEWLINE = Token(TokenType.NEWLINE, '\n')
+COMMA = Token(TokenType.COMMA, ',')
+
+
+def tokenize(input: Iterable[str]) -> Iterable[Token]:
+    for line in input:
+        for line_comment_start in LINE_COMMENT_STARTERS:
+            if line_comment_start in line:
+                line = line[:line.index(line_comment_start)]
+        line.strip(' \t\n')
+        if not line:
+            continue
+
+        parts = list(part for part in split_whitespace_respecting_quotes(line) if part)
+
+        yield from parse_line(parts)
+        yield NEWLINE
+
+
+def parse_line(parts: List[str]) -> Iterable[Token]:
+    if len(parts) == 0:
+        return ()
+    first_token = parts[0]
+
+    if first_token[0] == '.':
+        yield Token(TokenType.PSEUDO_OP, first_token)
+    elif first_token[-1] == ':':
+        yield Token(TokenType.LABEL, first_token)
+        yield from parse_line(parts[1:])
+        return
+    else:
+        yield Token(TokenType.INSTRUCTION_NAME, first_token)
+
+    for part in parts[1:]:
+        if part == ',':
+            yield COMMA
+            continue
+        yield from parse_arg(part)
+
+
+def parse_arg(arg: str) -> Iterable[Token]:
+    comma = arg[-1] == ','
+    arg = arg[:-1] if comma else arg
+    mem_match_resul = re.match(MEMORY_ADDRESS_PATTERN, arg)
+    if mem_match_resul:
+        register = mem_match_resul.group(2).lower()
+        if register not in RISCV_REGS:
+            raise ParseException(f'"{register}" is not a valid register!')
+        yield Token(TokenType.ARGUMENT, register)
+        yield Token(TokenType.ARGUMENT, mem_match_resul.group(1))
+    else:
+        yield Token(TokenType.ARGUMENT, arg)
+    if comma:
+        yield COMMA
+
+
+def print_tokens(tokens: Iterable[Token]):
+    for token in tokens:
+        print(token, end='\n' if token == NEWLINE else '')
+    print("", flush=True, end="")
+
+
+def split_whitespace_respecting_quotes(line: str) -> Iterable[str]:
+    quote = ""
+    part = ""
+    for c in line:
+        if c == quote:
+            yield part
+            part = ""
+            quote = ""
+            continue
+
+        if quote != "":
+            part += c
+            continue
+
+        if c in "\"'":
+            if part:
+                yield part
+            quote = c
+            part = ""
+            continue
+
+        if c in ' \t\n':
+            if part:
+                yield part
+            part = ""
+            continue
+
+        part += c
+
+    if part:
+        yield part
--- a/riscemu/types/init.py
+++ b/riscemu/types/init.py
@ -0,0 +1,29 @@
+from typing import Dict
+import re
+
+# define some base type aliases so we can keep track of absolute and relative addresses
+T_RelativeAddress = int
+T_AbsoluteAddress = int
+
+# parser options are just dictionaries with arbitrary values
+T_ParserOpts = Dict[str, any]
+
+NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
+
+# base classes
+from .flags import MemoryFlags
+from .int32 import UInt32, Int32
+from .instruction import Instruction
+from .instruction_context import InstructionContext
+from .memory_section import MemorySection
+from .program import Program
+from .program_loader import ProgramLoader
+from .cpu import CPU
+from .simple_instruction import SimpleInstruction
+from .instruction_memory_section import InstructionMemorySection
+from .binary_data_memory_section import BinaryDataMemorySection
+
+# exceptions
+from .exceptions import ParseException, NumberFormatException, MemoryAccessException, OutOfMemoryException, \
+    LinkerException, LaunchDebuggerException, RiscemuBaseException, InvalidRegisterException, \
+    InvalidAllocationException, InvalidSyscallException, UnimplementedInstruction
--- a/riscemu/types/binary_data_memory_section.py
+++ b/riscemu/types/binary_data_memory_section.py
@ -0,0 +1,29 @@
+from . import MemorySection, InstructionContext, MemoryFlags, T_RelativeAddress, Instruction
+from ..types.exceptions import MemoryAccessException
+
+
+class BinaryDataMemorySection(MemorySection):
+    def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None):
+        self.name = name
+        self.base = base
+        self.context = context
+        self.size = len(data)
+        self.flags = flags if flags is not None else MemoryFlags(False, False)
+        self.data = data
+        self.owner = owner
+
+    def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
+        if offset + size > self.size:
+            raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
+        return self.data[offset:offset + size]
+
+    def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
+        if offset + size > self.size:
+            raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
+        if len(data[0:size]) != size:
+            raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
+        self.data[offset:offset + size] = data[0:size]
+
+    def read_ins(self, offset: T_RelativeAddress) -> Instruction:
+        raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
+                                    offset, 4, 'instruction fetch')
--- a/riscemu/types/cpu.py
+++ b/riscemu/types/cpu.py
@ -0,0 +1,107 @@
+import typing
+from abc import ABC, abstractmethod
+from typing import List, Type, Callable, Set, Dict
+
+from ..registers import Registers
+from ..config import RunConfig
+from ..colors import FMT_RED, FMT_NONE
+from . import T_AbsoluteAddress, Instruction, Program, ProgramLoader
+
+
+class CPU(ABC):
+    # static cpu configuration
+    INS_XLEN: int = 4
+
+    # housekeeping variables
+    regs: Registers
+    mmu: 'MMU'
+    pc: T_AbsoluteAddress
+    cycle: int
+    halted: bool
+
+    # debugging context
+    debugger_active: bool
+
+    # instruction information
+    instructions: Dict[str, Callable[[Instruction], None]]
+    instruction_sets: Set['InstructionSet']
+
+    # configuration
+    conf: RunConfig
+
+    def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig):
+        self.mmu = mmu
+        self.regs = Registers()
+        self.conf = conf
+
+        self.instruction_sets = set()
+        self.instructions = dict()
+
+        for set_class in instruction_sets:
+            ins_set = set_class(self)
+            self.instructions.update(ins_set.load())
+            self.instruction_sets.add(ins_set)
+
+        self.halted = False
+        self.cycle = 0
+        self.pc = 0
+        self.debugger_active = False
+
+    def run_instruction(self, ins: Instruction):
+        """
+        Execute a single instruction
+
+        :param ins: The instruction to execute
+        """
+        if ins.name in self.instructions:
+            self.instructions[ins.name](ins)
+        else:
+            # this should never be reached, as unknown instructions are imparseable
+            raise RuntimeError("Unknown instruction: {}".format(ins))
+
+    def load_program(self, program: Program):
+        self.mmu.load_program(program)
+
+    def __repr__(self):
+        """
+        Returns a representation of the CPU and some of its state.
+        """
+        return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format(
+            self.__class__.__name__,
+            self.pc,
+            self.cycle,
+            self.halted,
+            " ".join(s.name for s in self.instruction_sets)
+        )
+
+    @abstractmethod
+    def step(self, verbose=False):
+        pass
+
+    @abstractmethod
+    def run(self, verbose=False):
+        pass
+
+    def launch(self, program: Program, verbose: bool = False):
+        if program not in self.mmu.programs:
+            print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE)
+            return
+
+        self.pc = program.entrypoint
+        self.run(verbose)
+
+    @classmethod
+    @abstractmethod
+    def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
+        pass
+
+    def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]:
+        return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name))
+
+    @property
+    def sections(self):
+        return self.mmu.sections
+
+    @property
+    def programs(self):
+        return self.mmu.programs
--- a/riscemu/types/exceptions.py
+++ b/riscemu/types/exceptions.py
@ -4,13 +4,12 @@ RiscEmu (c) 2021 Anton Lydike
 SPDX-License-Identifier: MIT
 """

-import typing
-
 from abc import abstractmethod
-from .colors import *
+from ..colors import *
+import typing

 if typing.TYPE_CHECKING:
-    from .Executable import LoadedInstruction
+    from . import Instruction


 class RiscemuBaseException(BaseException):
@ -18,12 +17,15 @@ class RiscemuBaseException(BaseException):
    def message(self):
        pass

+    def print_stacktrace(self):
+        import traceback
+        traceback.print_exception(type(self), self, self.__traceback__)

 # Parsing exceptions:

 class ParseException(RiscemuBaseException):
    def __init__(self, msg, data=None):
-        super().__init__()
+        super().__init__(msg, data)
        self.msg = msg
        self.data = data

@ -116,13 +118,15 @@ class InvalidAllocationException(RiscemuBaseException):


 class UnimplementedInstruction(RiscemuBaseException):
-    def __init__(self, ins: 'LoadedInstruction'):
+    def __init__(self, ins: 'Instruction', context = None):
        self.ins = ins
+        self.context = context

    def message(self):
-        return FMT_CPU + "{}({})".format(
+        return FMT_CPU + "{}({}{})".format(
            self.__class__.__name__,
-            repr(self.ins)
+            repr(self.ins),
+            ', context={}'.format(self.context) if self.context is not None else ''
        ) + FMT_NONE


--- a/riscemu/types/flags.py
+++ b/riscemu/types/flags.py
@ -0,0 +1,13 @@
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class MemoryFlags:
+    read_only: bool
+    executable: bool
+
+    def __repr__(self):
+        return "r{}{}".format(
+            '-' if self.read_only else 'w',
+            'x' if self.executable else '-'
+        )
--- a/riscemu/types/instruction.py
+++ b/riscemu/types/instruction.py
@ -0,0 +1,31 @@
+from abc import ABC, abstractmethod
+from typing import Tuple
+
+
+class Instruction(ABC):
+    name: str
+    args: tuple
+
+    @abstractmethod
+    def get_imm(self, num: int) -> int:
+        """
+        parse and get immediate argument
+        """
+        pass
+
+    @abstractmethod
+    def get_imm_reg(self, num: int) -> Tuple[int, str]:
+        """
+        parse and get an argument imm(reg)
+        """
+        pass
+
+    @abstractmethod
+    def get_reg(self, num: int) -> str:
+        """
+        parse and get an register argument
+        """
+        pass
+
+    def __repr__(self):
+        return "{} {}".format(self.name, ", ".join(self.args))
--- a/riscemu/types/instruction_context.py
+++ b/riscemu/types/instruction_context.py
@ -0,0 +1,53 @@
+from collections import defaultdict
+from typing import Dict, List, Optional
+
+from .exceptions import ParseException
+from ..types import T_AbsoluteAddress, T_RelativeAddress, NUMBER_SYMBOL_PATTERN
+
+
+class InstructionContext:
+    base_address: T_AbsoluteAddress
+    """
+    The address where the instruction block is placed
+    """
+
+    labels: Dict[str, T_RelativeAddress]
+    """
+    This dictionary maps all labels to their relative position of the instruction block
+    """
+
+    numbered_labels: Dict[str, List[T_RelativeAddress]]
+    """
+    This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where 
+    the label was placed 
+    """
+
+    global_symbol_dict: Dict[str, T_AbsoluteAddress]
+    """
+    A reference to the MMU's global symbol dictionary for access to global symbols
+    """
+
+    def __init__(self):
+        self.labels = dict()
+        self.numbered_labels = defaultdict(list)
+        self.base_address = 0
+        self.global_symbol_dict = dict()
+
+    def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]:
+        if NUMBER_SYMBOL_PATTERN.match(symbol):
+            if address_at is None:
+                raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
+
+            direction = symbol[-1]
+            values = self.numbered_labels.get(symbol[:-1], [])
+            if direction == 'b':
+                return max((addr + self.base_address for addr in values if addr < address_at), default=None)
+            else:
+                return min((addr + self.base_address for addr in values if addr > address_at), default=None)
+        else:
+            # if it's not a local symbol, try the globals
+            if symbol not in self.labels:
+                return self.global_symbol_dict.get(symbol, None)
+            # otherwise return the local symbol
+            return self.labels.get(symbol, None)
+
--- a/riscemu/types/instruction_memory_section.py
+++ b/riscemu/types/instruction_memory_section.py
@ -0,0 +1,27 @@
+from typing import List
+
+from . import MemorySection, Instruction, InstructionContext, MemoryFlags, T_RelativeAddress
+from .exceptions import MemoryAccessException
+
+
+class InstructionMemorySection(MemorySection):
+    def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0):
+        self.name = name
+        self.base = base
+        self.context = context
+        self.size = len(instructions) * 4
+        self.flags = MemoryFlags(True, True)
+        self.instructions = instructions
+        self.owner = owner
+
+    def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
+        raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
+
+    def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
+        raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
+
+    def read_ins(self, offset: T_RelativeAddress) -> Instruction:
+        if offset % 4 != 0:
+            raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
+        return self.instructions[offset // 4]
+
--- a/riscemu/types/int32.py
+++ b/riscemu/types/int32.py
@ -0,0 +1,273 @@
+from typing import Union
+from ctypes import c_int32, c_uint32
+
+
+class Int32:
+    """
+    This class implements 32bit signed integers (see :class:`UInt32` for unsigned integers)
+
+    It implements basically all mathematical dunder magic methods (__add__, __sub__, etc.)
+
+    You can use it just like you would any other integer, just be careful when passing it
+    to functions which actually expect an integer and not a Int32.
+    """
+    _type = c_int32
+    __slots__ = ('_val',)
+
+    def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0):
+        if isinstance(val, (bytes, bytearray)):
+            signed = len(val) == 4 and self._type == c_int32
+            self._val = self.__class__._type(int.from_bytes(val, 'little', signed=signed))
+        elif isinstance(val, self.__class__._type):
+            self._val = val
+        elif isinstance(val, (c_uint32, c_int32, Int32)):
+            self._val = self.__class__._type(val.value)
+        elif isinstance(val, int):
+            self._val = self.__class__._type(val)
+        else:
+            raise RuntimeError(
+                "Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val)
+            )
+
+    def __add__(self, other: Union['Int32', int]):
+        if isinstance(other, Int32):
+            other = other.value
+
+        return self.__class__(self._val.value + other)
+
+    def __sub__(self, other: Union['Int32', int]):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.__class__(self._val.value - other)
+
+    def __mul__(self, other: Union['Int32', int]):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.__class__(self._val.value * other)
+
+    def __truediv__(self, other):
+        return self // other
+
+    def __floordiv__(self, other):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.__class__(self.value // other)
+
+    def __mod__(self, other: Union['Int32', int]):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.__class__(self._val.value % other)
+
+    def __and__(self, other: Union['Int32', int]):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.__class__(self._val.value & other)
+
+    def __or__(self, other: Union['Int32', int]):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.__class__(self._val.value | other)
+
+    def __xor__(self, other: Union['Int32', int]):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.__class__(self._val.value ^ other)
+
+    def __lshift__(self, other: Union['Int32', int]):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.__class__(self.value << other)
+
+    def __rshift__(self, other: Union['Int32', int]):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.__class__(self.value >> other)
+
+    def __eq__(self, other: Union['Int32', int]):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.value == other
+
+    def __neg__(self):
+        return self.__class__(-self._val.value)
+
+    def __abs__(self):
+        return self.__class__(abs(self.value))
+
+    def __bytes__(self):
+        return self.to_bytes(4)
+
+    def __repr__(self):
+        return '{}({})'.format(self.__class__.__name__, self.value)
+
+    def __str__(self):
+        return str(self.value)
+
+    def __format__(self, format_spec):
+        return self.value.__format__(format_spec)
+
+    def __hash__(self):
+        return hash(self.value)
+
+    def __gt__(self, other):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.value > other
+
+    def __lt__(self, other):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.value < other
+
+    def __le__(self, other):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.value <= other
+
+    def __ge__(self, other):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.value >= other
+
+    def __bool__(self):
+        return bool(self.value)
+
+    def __cmp__(self, other):
+        if isinstance(other, Int32):
+            other = other.value
+        return self.value.__cmp__(other)
+
+    # right handed binary operators
+
+    def __radd__(self, other):
+        return self + other
+
+    def __rsub__(self, other):
+        return self.__class__(other) - self
+
+    def __rmul__(self, other):
+        return self * other
+
+    def __rtruediv__(self, other):
+        return self.__class__(other) // self
+
+    def __rfloordiv__(self, other):
+        return self.__class__(other) // self
+
+    def __rmod__(self, other):
+        return self.__class__(other) % self
+
+    def __rand__(self, other):
+        return self.__class__(other) & self
+
+    def __ror__(self, other):
+        return self.__class__(other) | self
+
+    def __rxor__(self, other):
+        return self.__class__(other) ^ self
+
+    @property
+    def value(self) -> int:
+        """
+        The value represented by this Integer
+        :return:
+        """
+        return self._val.value
+
+    def unsigned(self) -> 'UInt32':
+        """
+        Convert to an unsigned representation. See :class:Uint32
+        :return:
+        """
+        return UInt32(self)
+
+    def to_bytes(self, bytes: int = 4) -> bytearray:
+        """
+        Convert to a bytearray of length :param:bytes
+
+        :param bytes: The length of the bytearray
+        :return: A little-endian representation of the contained integer
+        """
+        return bytearray(self.unsigned_value.to_bytes(4, 'little'))[0:bytes]
+
+    def signed(self) -> 'Int32':
+        """
+        Convert to a signed representation. See :class:Int32
+        :return:
+        """
+        if self.__class__ == Int32:
+            return self
+        return Int32(self)
+
+    @property
+    def unsigned_value(self):
+        """
+        Return the value interpreted as an unsigned integer
+        :return:
+        """
+        return c_uint32(self.value).value
+
+    def shift_right_logical(self, ammount: Union['Int32', int]) -> 'Int32':
+        """
+        This function implements logical right shifts, meaning that the sign bit is shifted as well.
+
+        This is equivalent to (self.value % 0x100000000) >> ammount
+
+        :param ammount: Number of positions to shift
+        :return: A new Int32 object representing the shifted value (keeps the signed-ness of the source)
+        """
+        if isinstance(ammount, Int32):
+            ammount = ammount.value
+        return self.__class__((self.value % 0x100000000) >> ammount)
+
+    def __int__(self):
+        return self.value
+
+    def __hex__(self):
+        return hex(self.value)
+
+    @classmethod
+    def sign_extend(cls, data: Union[bytes, bytearray, int], bits: int):
+        """
+        Create an instance of Int32 by sign extending :param:bits bits from :param:data
+        to 32 bits
+
+        :param data: The source data
+        :param bits: The number of bits in the source data
+        :return: An instance of Int32, holding the sign-extended value
+        """
+        if isinstance(data, (bytes, bytearray)):
+            data = int.from_bytes(data, 'little')
+        sign = data >> (bits - 1)
+        if sign > 1:
+            print("overflow in Int32.sext!")
+        if sign:
+            data = (data & (2 ** (bits - 1) - 1)) - 2**(bits-1)
+        return cls(data)
+
+
+class UInt32(Int32):
+    """
+    An unsigned version of :class:Int32.
+    """
+    _type = c_uint32
+
+    def unsigned(self) -> 'UInt32':
+        """
+        Return a new instance representing the same bytes, but signed
+        :return:
+        """
+        return self
+
+    @property
+    def unsigned_value(self) -> int:
+        return self._val.value
+
+    def shift_right_logical(self, ammount: Union['Int32', int]) -> 'UInt32':
+        """
+        see :meth:`Int32.shift_right_logical <Int32.shift_right_logical>`
+
+        :param ammount: Number of positions to shift
+        :return: A new Int32 object representing the shifted value (keeps the signed-ness of the source)
+        """
+        return self >> ammount
--- a/riscemu/types/memory_section.py
+++ b/riscemu/types/memory_section.py
@ -0,0 +1,88 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Optional
+
+from ..colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE
+from ..helpers import format_bytes
+from . import MemoryFlags, T_AbsoluteAddress, InstructionContext, T_RelativeAddress, Instruction
+
+
+@dataclass
+class MemorySection(ABC):
+    name: str
+    flags: MemoryFlags
+    size: int
+    base: T_AbsoluteAddress
+    owner: str
+    context: InstructionContext
+
+    @property
+    def end(self):
+        return self.base + self.size
+
+    @abstractmethod
+    def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
+        pass
+
+    @abstractmethod
+    def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
+        pass
+
+    @abstractmethod
+    def read_ins(self, offset: T_RelativeAddress) -> Instruction:
+        pass
+
+    def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
+             bytes_per_row: int = 16, rows: int = 10, group: int = 4):
+        if self.flags.executable:
+            bytes_per_row = 4
+        highlight = None
+        if end is None:
+            end = min(start + (bytes_per_row * (rows // 2)), self.size - 1)
+            highlight = start
+            start = max(0, start - (bytes_per_row * (rows // 2)))
+
+        if self.flags.executable:
+            print(FMT_MEM + "{}, viewing {} instructions:".format(
+                self, (end - start) // 4
+            ) + FMT_NONE)
+
+            for addr in range(start, end, 4):
+                if addr == highlight:
+                    print(FMT_UNDERLINE + FMT_ORANGE, end='')
+                print("0x{:04x}: {}{}".format(
+                    self.base + addr, self.read_ins(addr), FMT_NONE
+                ))
+        else:
+            print(FMT_MEM + "{}, viewing {} bytes:".format(
+                self, (end - start)
+            ) + FMT_NONE)
+
+            aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end
+
+            for addr in range(start, aligned_end, bytes_per_row):
+                hi_ind = (highlight - addr) // group if highlight is not None else -1
+                print("0x{:04x}: {}{}".format(
+                    self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
+                ))
+
+            if aligned_end != end:
+                hi_ind = (highlight - aligned_end) // group if highlight is not None else -1
+                print("0x{:04x}: {}{}".format(
+                    self.base + aligned_end, format_bytes(
+                        self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind
+                    ), FMT_NONE
+                ))
+
+    def dump_all(self, *args, **kwargs):
+        self.dump(0, self.size, *args, **kwargs)
+
+    def __repr__(self):
+        return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
+            self.__class__.__name__,
+            self.name,
+            self.base,
+            self.size,
+            self.flags,
+            self.owner
+        )
--- a/riscemu/types/program.py
+++ b/riscemu/types/program.py
@ -0,0 +1,104 @@
+from typing import List, Optional, Set
+
+from ..colors import FMT_RED, FMT_BOLD, FMT_NONE, FMT_MEM
+from ..helpers import get_section_base_name
+from . import InstructionContext, T_AbsoluteAddress, MemorySection
+
+
+class Program:
+    """
+    This represents a collection of sections which together form an executable program
+
+    When you want to create a program which can be located anywhere in memory, set base to None,
+    this signals the other components, that this is relocatable. Set the base of each section to
+    the offset in the program, and everything will be taken care of for you.
+
+    """
+    name: str
+    context: InstructionContext
+    global_labels: Set[str]
+    relative_labels: Set[str]
+    sections: List[MemorySection]
+    base: Optional[T_AbsoluteAddress]
+    is_loaded: bool
+
+    @property
+    def size(self):
+        if len(self.sections) == 0:
+            return 0
+        if self.base is None:
+            return self.sections[-1].base + self.sections[-1].size
+        return (self.sections[-1].base - self.base) + self.sections[-1].size
+
+    def __init__(self, name: str, base: Optional[int] = None):
+        self.name = name
+        self.context = InstructionContext()
+        self.sections = []
+        self.global_labels = set()
+        self.relative_labels = set()
+        self.base = base
+        self.is_loaded = False
+
+    def add_section(self, sec: MemorySection):
+        # print a warning when a section is located before the programs base
+        if self.base is not None:
+            if sec.base < self.base:
+                print(
+                    FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format(
+                        sec, self.name, self.base
+                    ) + FMT_NONE)
+
+        self.sections.append(sec)
+        # keep section list ordered
+        self.sections.sort(key=lambda section: section.base)
+
+    def __repr__(self):
+        return "{}(name={},sections={},base={})".format(
+            self.__class__.__name__, self.name, self.global_labels,
+            [s.name for s in self.sections], self.base
+        )
+
+    @property
+    def entrypoint(self):
+        if '_start' in self.context.labels:
+            return self.context.labels.get('_start')
+        if 'main' in self.context.labels:
+            return self.context.labels.get('main')
+        for sec in self.sections:
+            if get_section_base_name(sec.name) == '.text' and sec.flags.executable:
+                return sec.base
+
+    def loaded_trigger(self, at_addr: T_AbsoluteAddress):
+        """
+        This trigger is called when the binary is loaded and its final address in memory is determined
+
+        This will do a small sanity check to prevent programs loading twice, or at addresses they don't
+        expect to be loaded.
+
+        Then it will finalize all relative symbols defined in it to point to the correct addresses.
+
+        :param at_addr: the address where the program will be located
+        """
+        if self.is_loaded:
+            if at_addr != self.base:
+                raise RuntimeError("Program loaded twice at different addresses! This will probably break things!")
+            return
+
+        if self.base is not None and self.base != at_addr:
+            print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, '
+                            'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE)
+
+        # check if we are relocating
+        if self.base != at_addr:
+            offset = at_addr if self.base is None else at_addr - self.base
+
+            # move all sections by the offset
+            for sec in self.sections:
+                sec.base += offset
+
+            # move all relative symbols by the offset
+            for name in self.relative_labels:
+                self.context.labels[name] += offset
+
+        self.base = at_addr
+        self.context.base_address = at_addr
--- a/riscemu/types/program_loader.py
+++ b/riscemu/types/program_loader.py
@ -0,0 +1,58 @@
+import os
+from abc import abstractmethod, ABC
+from typing import Union, Iterator, List
+
+from . import T_ParserOpts, Program
+
+
+class ProgramLoader(ABC):
+    """
+    A program loader is always specific to a given source file. It is a place to store all state
+    concerning the parsing and loading of that specific source file, including options.
+    """
+
+    def __init__(self, source_path: str, options: T_ParserOpts):
+        self.source_path = source_path
+        self.options = options
+        self.filename = os.path.split(self.source_path)[-1]
+
+    @classmethod
+    @abstractmethod
+    def can_parse(cls, source_path: str) -> float:
+        """
+        Return confidence that the file located at source_path
+        should be parsed and loaded by this loader
+        :param source_path: the path of the source file
+        :return: the confidence that this file belongs to this parser
+        """
+        pass
+
+    @classmethod
+    @abstractmethod
+    def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
+        """
+        parse command line args into an options dictionary
+
+        :param argv: the command line args list
+        :return: all remaining command line args and the parser options object
+        """
+        pass
+
+    @classmethod
+    def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
+        """
+        Instantiate a loader for the given source file with the required arguments
+
+        :param source_path: the path to the source file
+        :param options: the parsed options (guaranteed to come from this classes get_options method.
+        :return: An instance of a ProgramLoader for the spcified source
+        """
+        return cls(source_path, options)
+
+    @abstractmethod
+    def parse(self) -> Union[Program, Iterator[Program]]:
+        """
+
+        :return:
+        """
+        pass
--- a/riscemu/types/simple_instruction.py
+++ b/riscemu/types/simple_instruction.py
@ -0,0 +1,26 @@
+from typing import Union, Tuple
+
+from . import Instruction, T_RelativeAddress, InstructionContext
+from ..helpers import parse_numeric_argument
+
+
+class SimpleInstruction(Instruction):
+    def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]],
+                 context: InstructionContext, addr: T_RelativeAddress):
+        self.context = context
+        self.name = name
+        self.args = args
+        self.addr = addr
+
+    def get_imm(self, num: int) -> int:
+        resolved_label = self.context.resolve_label(self.args[num], self.addr)
+        if resolved_label is None:
+            return parse_numeric_argument(self.args[num])
+        return resolved_label
+
+    def get_imm_reg(self, num: int) -> Tuple[int, str]:
+        return self.get_imm(num + 1), self.get_reg(num)
+
+    def get_reg(self, num: int) -> str:
+        return self.args[num]
+
--- a/setup.py
+++ b/setup.py
@ -8,7 +8,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 setuptools.setup(
    name="riscemu",
    version=riscemu.__version__,
-    author="Anton Lydike",
+    author=riscemu.__author__,
    author_email="pip@antonlydike.de",
    description="RISC-V userspace and privileged emulator",
    long_description=long_description,
@ -23,7 +23,7 @@ setuptools.setup(
        "Operating System :: OS Independent",
    ],
    package_dir={"": "."},
-    packages=["riscemu", "riscemu.decoder", "riscemu.instructions", "riscemu.IO", "riscemu.priv"],
+    packages=["riscemu", "riscemu.decoder", "riscemu.instructions", "riscemu.IO", "riscemu.priv", "riscemu.types"],
    python_requires=">=3.6",
    install_requires=[
        "pyelftools~=0.27"
--- a/sphinx-docs/source/conf.py
+++ b/sphinx-docs/source/conf.py
@ -24,11 +24,11 @@ if os.getenv('READTHEDOCS', False) and not os.path.exists('riscemu.rst'):
 # -- Project information -----------------------------------------------------

 project = 'RiscEmu'
-copyright = '2021, Anton Lydike'
+copyright = '2022, Anton Lydike'
 author = 'Anton Lydike'

 # The full version, including alpha/beta/rc tags
-release = '0.1.0'
+release = '2.0.0a2'

 # -- General configuration ---------------------------------------------------

--- a/test/init.py
+++ b/test/init.py
@ -0,0 +1,3 @@
+from .test_tokenizer import *
+from .test_helpers import *
+from .test_integers import *
--- a/test/end_to_end/init.py
+++ b/test/end_to_end/init.py
--- a/test/end_to_end/end_to_end_test.py
+++ b/test/end_to_end/end_to_end_test.py
@ -0,0 +1,73 @@
+import contextlib
+import os
+from abc import abstractmethod
+from tempfile import NamedTemporaryFile
+from typing import Optional, Union, Tuple
+from unittest import TestCase
+
+from riscemu import CPU, UserModeCPU, InstructionSetDict, RunConfig
+from riscemu.types import Program
+
+
+class EndToEndTest(TestCase):
+
+    def __init__(self, cpu: Optional[CPU] = None):
+        super().__init__()
+        if cpu is None:
+            cpu = UserModeCPU(InstructionSetDict.values(), RunConfig())
+        self.cpu = cpu
+
+    @abstractmethod
+    def get_source(self) -> Tuple[str, Union[bytes, str, bytearray]]:
+        """
+        This method returns the source code of the program
+        :return:
+        """
+        pass
+
+    def test_run_program(self):
+        """
+        Runs the program and verifies output
+        :return:
+        """
+        with self.with_source_file() as names:
+            fname, orig_name = names
+            loader = self.cpu.get_best_loader_for(fname)
+            self.program = loader.instantiate(fname, loader.get_options([])).parse()
+            self._change_program_file_name(self.program, orig_name)
+            self.cpu.load_program(self.program)
+            self.after_program_load(self.program)
+            if isinstance(self.cpu, UserModeCPU):
+                self.cpu.setup_stack()
+            try:
+                self.cpu.launch(self.program)
+            except Exception as ex:
+                if self.is_exception_expected(ex):
+                    pass
+                raise ex
+
+    @contextlib.contextmanager
+    def with_source_file(self):
+        name, content = self.get_source()
+        if isinstance(content, str):
+            f = NamedTemporaryFile('w', suffix=name, delete=False)
+        else:
+            f = NamedTemporaryFile('wb', suffix=name, delete=False)
+        f.write(content)
+        f.flush()
+        f.close()
+        try:
+            yield f.name, name
+        finally:
+            os.unlink(f.name)
+
+    def after_program_load(self, program):
+        pass
+
+    def is_exception_expected(self, ex: Exception) -> bool:
+        return False
+
+    def _change_program_file_name(self, program: Program, new_name: str):
+        program.name = new_name
+        for sec in program.sections:
+            sec.owner = new_name
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@ -3,28 +3,17 @@ from unittest import TestCase
 from riscemu.helpers import *


-class Test(TestCase):
-    def test_int_to_bytes(self):
-        self.assertEqual(int_to_bytes(-1), bytearray([0xff] * 4), "-1")
-        self.assertEqual(int_to_bytes(1), bytearray([0, 0, 0, 1]), "1")
-        self.assertEqual(int_to_bytes(1231132), bytearray(b'\x00\x12\xc9\x1c'), "random number")
-        self.assertEqual(int_to_bytes(-1231132), bytearray(b'\xff\xed6\xe4'), "random negative number")
+class TestHelpers(TestCase):

-    def test_int_from_bytes(self):
-        self.assertEqual(bytearray([0xff] * 4), int_to_bytes(-1), "-1")
-        self.assertEqual(bytearray([0, 0, 0, 1]), int_to_bytes(1), "1")
-        self.assertEqual(bytearray(b'\x00\x12\xc9\x1c'), int_to_bytes(1231132), "random number")
-        self.assertEqual(bytearray(b'\xff\xed6\xe4'), int_to_bytes(-1231132), "random negative number")
-
-    def test_to_unsigned(self):
-        self.assertEqual(to_unsigned(-1), 0xFFFFFFFF)
-        self.assertEqual(to_unsigned(-100), 0xffffff9c)
-        self.assertEqual(to_unsigned(1), 1)
-        self.assertEqual(to_unsigned(0xffffffff), 0xffffffff)
-        self.assertEqual(to_unsigned(0xffed36e4), 0xffed36e4)
-
-    def test_to_signed(self):
-        self.assertEqual(to_signed(0xFFFFFFFF), -1)
-        self.assertEqual(to_signed(0xffed36e4), -1231132)
-        self.assertEqual(to_signed(0x0FFFFFFF), 0x0FFFFFFF)
+    def test_bind_twos_complement(self):
+        minval = -(1 << 31)
+        maxval = ((1 << 31)-1)

+        self.assertEqual(bind_twos_complement(minval), minval, "minval preserves")
+        self.assertEqual(bind_twos_complement(minval), minval, )
+        self.assertEqual(bind_twos_complement(maxval), maxval, "maxval preserves")
+        self.assertEqual(bind_twos_complement(minval - 1), maxval, "minval-1 wraps")
+        self.assertEqual(bind_twos_complement(maxval + 1), minval, "maxval+1 wraps")
+        self.assertEqual(bind_twos_complement(0), 0, "0 is 0")
+        self.assertEqual(bind_twos_complement(1), 1, "1 is 1")
+        self.assertEqual(bind_twos_complement(-1), -1, "-1 is -1")
--- a/test/test_integers.py
+++ b/test/test_integers.py
@ -0,0 +1,19 @@
+from unittest import TestCase
+
+from riscemu.types import Int32, UInt32
+
+
+class TestTokenizer(TestCase):
+
+    def test_logical_right_shift(self):
+        a = Int32(100)
+        self.assertEqual(a.shift_right_logical(0), a)
+        self.assertEqual(a.shift_right_logical(10), 0)
+        self.assertEqual(a.shift_right_logical(1), 100>>1)
+
+        a = Int32(-100)
+        self.assertEqual(a.shift_right_logical(0), a)
+        self.assertEqual(a.shift_right_logical(1), 2147483598)
+        self.assertEqual(a.shift_right_logical(10), 4194303)
+        self.assertEqual(a.shift_right_logical(31), 1)
+        self.assertEqual(a.shift_right_logical(32), 0)
--- a/test/test_isa.py
+++ b/test/test_isa.py
@ -0,0 +1,75 @@
+from riscemu.colors import FMT_ERROR, FMT_NONE, FMT_BOLD, FMT_GREEN
+from riscemu.instructions import InstructionSet
+from riscemu.types import Instruction, CPU
+from riscemu.decoder import RISCV_REGS
+
+FMT_SUCCESS = FMT_GREEN + FMT_BOLD
+
+
+def assert_equals(ins: Instruction, cpu: CPU):
+    a, b = (get_arg_from_ins(ins, i, cpu) for i in (0, 2))
+    return a == b
+
+
+def assert_equals_mem(ins: Instruction, cpu: CPU):
+    a, b = (get_arg_from_ins(ins, i, cpu) for i in (0, 2))
+    a = cpu.mmu.read_int(a)
+    return a == b
+
+
+def assert_in(ins: Instruction, cpu: CPU):
+    a = get_arg_from_ins(ins, 0, cpu)
+    others = [get_arg_from_ins(ins, i, cpu) for i in range(2, len(ins.args))]
+    return a in others
+
+
+def _not(func):
+    def test(ins: Instruction, cpu: CPU):
+        return not func(ins, cpu)
+
+    return test
+
+
+def get_arg_from_ins(ins: Instruction, num: int, cpu: CPU):
+    a = ins.args[num]
+    if a in RISCV_REGS:
+        return cpu.regs.get(a)
+    return ins.get_imm(num)
+
+
+assert_ops = {
+    '==': assert_equals,
+    '!=': _not(assert_equals),
+    'in': assert_in,
+    'not_in': _not(assert_in),
+}
+
+
+class TestIS(InstructionSet):
+    def __init__(self, cpu: 'CPU'):
+        print('[Test] loading testing ISA, this is only meant for running testcases and is not part of the RISC-V ISA!')
+        self.failed = False
+        super().__init__(cpu)
+
+    def instruction_assert(self, ins: Instruction):
+        if len(ins.args) < 3:
+            print(FMT_ERROR + '[Test] Unknown assert statement: {}'.format(ins) + FMT_NONE)
+            return
+        op = ins.args[1]
+        if op not in assert_ops:
+            print(FMT_ERROR + '[Test] Unknown operation statement: {} in {}'.format(op, ins) + FMT_NONE)
+            return
+
+        if assert_ops[op](ins, self.cpu):
+            print(FMT_SUCCESS + '[TestCase] 🟢 passed assertion {}'.format(ins))
+        else:
+            print(FMT_ERROR + '[TestCase] 🔴 failed assertion {}'.format(ins))
+            self.cpu.halted = True
+            self.failed = True
+
+    def instruction_fail(self, ins: Instruction):
+            print(FMT_ERROR + '[TestCase] 🔴 reached fail instruction! {}'.format(ins))
+            self.cpu.halted = True
+            self.failed = True
+
+    def assert_mem(self, ins: Instruction):
--- a/test/test_tokenizer.py
+++ b/test/test_tokenizer.py
@ -0,0 +1,126 @@
+from unittest import TestCase
+
+from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA, \
+    split_whitespace_respecting_quotes
+
+
+def ins(name: str) -> Token:
+    return Token(TokenType.INSTRUCTION_NAME, name)
+
+
+def arg(name: str) -> Token:
+    return Token(TokenType.ARGUMENT, name)
+
+
+def op(name: str) -> Token:
+    return Token(TokenType.PSEUDO_OP, name)
+
+
+def lbl(name: str) -> Token:
+    return Token(TokenType.LABEL, name)
+
+
+class TestTokenizer(TestCase):
+
+    def test_instructions(self):
+        program = [
+            'li     a0, 144',
+            'divi   a0, a0, 12',
+            'xori   a1, a0, 12'
+        ]
+        tokens = [
+            ins('li'), arg('a0'), COMMA, arg('144'), NEWLINE,
+            ins('divi'), arg('a0'), COMMA, arg('a0'), COMMA, arg('12'), NEWLINE,
+            ins('xori'), arg('a1'), COMMA, arg('a0'), COMMA, arg('12'), NEWLINE,
+        ]
+        self.assertEqual(list(tokenize(program)), tokens)
+
+    def test_comments(self):
+        parsed_res = [
+            ins('li'), arg('a0'), COMMA, arg('144'), NEWLINE
+        ]
+        for c in ('#', '//', ';'):
+            lines = [
+                c + ' this is a comment',
+                'li a0, 144'
+            ]
+            self.assertEqual(list(tokenize(lines)), parsed_res)
+
+    def test_pseudo_ins(self):
+        parsed_res = [
+            Token(TokenType.PSEUDO_OP, '.section'), Token(TokenType.ARGUMENT, '.text'), NEWLINE,
+            Token(TokenType.PSEUDO_OP, '.type'), Token(TokenType.ARGUMENT, 'init'), COMMA,
+            Token(TokenType.ARGUMENT, '@function'), NEWLINE
+        ]
+        input_program = [
+            '.section .text',
+            '.type init, @function'
+        ]
+        self.assertEqual(list(tokenize(input_program)), parsed_res)
+
+    def test_full_program(self):
+        program = """
+# a hashtag comment
+
+; semicolon comment followed by an empty line
+.section .text
+// double slash comment
+    addi sp, sp, -32
+    sw   s0, 0(ra)
+section:
+    sub  s0, s0, s0
+"""
+        tokens = [
+            op('.section'), arg('.text'), NEWLINE,
+            ins('addi'), arg('sp'), COMMA, arg('sp'), COMMA, arg('-32'), NEWLINE,
+            ins('sw'), arg('s0'), COMMA, arg('ra'), arg('0'), NEWLINE,
+            lbl('section:'), NEWLINE,
+            ins('sub'), arg('s0'), COMMA, arg('s0'), COMMA, arg('s0'), NEWLINE
+        ]
+
+        self.assertEqual(list(tokenize(program.splitlines())), tokens)
+
+    def test_split_whitespace_respecting_quotes_single(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes("test")), ["test"]
+        )
+
+    def test_split_whitespace_respecting_quotes_empty(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes("")), []
+        )
+
+    def test_split_whitespace_respecting_quotes_two_parts(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes("test 123")), ["test", "123"]
+        )
+
+    def test_split_whitespace_respecting_quotes_whole_quoted(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes("'test 123'")), ["test 123"]
+        )
+
+    def test_split_whitespace_respecting_quotes_double_quotes(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes('"test 123"')), ["test 123"]
+        )
+
+    def test_split_whitespace_respecting_quotes_quoted_then_normal(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes('"test 123" abc')), ["test 123", "abc"]
+        )
+
+    def test_split_whitespace_respecting_quotes_quoted_sorrounded(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes('hello "test 123" abc')), ["hello", "test 123", "abc"]
+        )
+
+    def test_split_whitespace_respecting_quotes_weird_spaces(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes('hello  "test 123"\tabc')), ["hello", "test 123", "abc"]
+        )
+
+    def test_split_whitespace_respecting_quotes_quotes_no_spaces(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes('hello"test 123"abc')), ["hello", "test 123", "abc"]
+        )
--- a/test/testcases/main.py
+++ b/test/testcases/main.py
@ -0,0 +1,53 @@
+from riscemu import AssemblyFileLoader
+from riscemu.colors import *
+
+FMT_SUCCESS = FMT_GREEN + FMT_BOLD
+
+def run_test(path: str):
+    from riscemu import CPU, UserModeCPU, RunConfig
+    from riscemu.instructions import InstructionSetDict
+    from test.test_isa import TestIS
+    import os
+
+    fname = os.path.basename(path)
+
+    ISAs = list(InstructionSetDict.values())
+    ISAs.append(TestIS)
+
+    cpu = UserModeCPU(ISAs, RunConfig())
+    try:
+        program = AssemblyFileLoader(path, {}).parse()
+        cpu.load_program(program)
+        cpu.launch(program)
+    except Exception as ex:
+        print(FMT_ERROR + '[Test] 🔴 failed with exception "{}" ({})'.format(ex, fname) + FMT_NONE)
+        raise ex
+
+    if cpu.halted:
+        for isa in cpu.instruction_sets:
+            if isinstance(isa, TestIS):
+                if not isa.failed:
+                    print(FMT_SUCCESS + '[Test] 🟢 successful {}'.format(fname) + FMT_NONE)
+                return not isa.failed
+    return False
+
+
+if __name__ == '__main__':
+
+    import os
+    import glob
+
+    successes = 0
+    failures = 0
+    ttl = 0
+
+    for path in glob.glob(f'{os.path.dirname(__file__)}/*.asm'):
+        print(FMT_BLUE + '[Test] running testcase ' + os.path.basename(path) + FMT_NONE)
+        ttl += 1
+        if run_test(path):
+            successes += 1
+        else:
+            failures += 1
+
+
+
--- a/test/testcases/half-loads.asm
+++ b/test/testcases/half-loads.asm
@ -0,0 +1,7 @@
+.data
+
+data:
+.word   0xFFFFFFFF, 0x0000FFFF, 0xFF00FF00, 0x7FFFFFFF
+
+.text
+    ebreak
--- a/test/testcases/symbols.asm
+++ b/test/testcases/symbols.asm
@ -0,0 +1,20 @@
+.text
+
+main:
+        addi    a0, zero, main
+        addi    a1, zero, main
+        addi    t0, zero, 1000
+        assert  a0, ==, 0x100
+1:
+        addi    a1, a1, 1
+        blt     a1, t0, 1b
+        sub     a1, a1, a0
+        j       1f
+        addi    a1, zero, 0
+        fail
+1:
+        assert  a1, ==, 744
+        add     a0, zero, a1            ; set exit code to a1
+        addi    a7, zero, SCALL_EXIT    ; exit syscall code
+        scall
+        fail