started with base type overhaul

2022-02-11 13:32:02 +01:00 · 2022-02-11 13:32:02 +01:00 · 5538034f8b
commit 5538034f8b
parent 0488a9d6bc
22 changed files with 921 additions and 579 deletions
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2021 Anton Lydike
+Copyright (c) 2021-2022 Anton Lydike

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/riscemu/CPU.py
+++ b/riscemu/CPU.py
@ -9,7 +9,7 @@ on them.
 import sys
 from typing import Tuple, List, Dict, Callable, Type

-from .base_types import MemoryFlags
+from .types import MemoryFlags
 from .syscall import SyscallInterface, get_syscall_symbols
 from .exceptions import RiscemuBaseException, LaunchDebuggerException
 from .MMU import MMU
@ -23,7 +23,7 @@ import riscemu
 import typing

 if typing.TYPE_CHECKING:
-    from . import base_types, LoadedExecutable, LoadedInstruction
+    from . import types, LoadedExecutable, LoadedInstruction
    from .instructions.InstructionSet import InstructionSet


@ -34,7 +34,7 @@ class CPU:
    It is initialized with a configuration and a list of instruction sets.
    """

-    INS_XLEN = 1
+    INS_XLEN = 4

    def __init__(self, conf: RunConfig, instruction_sets: List[Type['riscemu.InstructionSet']]):
        """
@ -70,34 +70,6 @@ class CPU:
        if conf.include_scall_symbols:
            self.mmu.global_symbols.update(get_syscall_symbols())

-    def get_tokenizer(self, tokenizer_input):
-        """
-        Returns a tokenizer that respects the language of the CPU
-
-        :param tokenizer_input: an instance of the RiscVTokenizerInput class
-        """
-        return RiscVTokenizer(tokenizer_input, self.all_instructions())
-
-    def load(self, e: riscemu.base_types):
-        """
-        Load an executable into Memory
-        """
-        return self.mmu.load_bin(e)
-
-    def run_loaded(self, le: 'riscemu.LoadedExecutable'):
-        """
-        Run a loaded executable
-        """
-        self.pc = le.run_ptr
-
-        if self.conf.stack_size > 0:
-            self.stack = self.mmu.allocate_section("stack", self.conf.stack_size, MemoryFlags(False, False))
-            self.regs.set('sp', self.stack.base + self.stack.size)
-            print(FMT_CPU + '[CPU] Allocated {} bytes of stack'.format(self.stack.size) + FMT_NONE)
-
-        print(FMT_CPU + '[CPU] Started running from 0x{:08X} ({})'.format(le.run_ptr, le.name) + FMT_NONE)
-        self._run()
-
    def continue_from_debugger(self, verbose=True):
        """
        called from the debugger to continue running
@ -157,24 +129,6 @@ class CPU:
            print()
            print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE)

-    def run_instruction(self, ins: 'LoadedInstruction'):
-        """
-        Execute a single instruction
-
-        :param ins: The instruction to execute
-        """
-        if ins.name in self.instructions:
-            self.instructions[ins.name](ins)
-        else:
-            # this should never be reached, as unknown instructions are imparseable
-            raise RuntimeError("Unknown instruction: {}".format(ins))
-
-    def all_instructions(self) -> List[str]:
-        """
-        Return a list of all instructions this CPU can execute.
-        """
-        return list(self.instructions.keys())
-
    def __repr__(self):
        """
        Returns a representation of the CPU and some of its state.
--- a/riscemu/MMU.py
+++ b/riscemu/MMU.py
@ -4,17 +4,20 @@ RiscEmu (c) 2021 Anton Lydike
 SPDX-License-Identifier: MIT
 """

-from .base_types import InstructionContext, Instruction, MemorySection, MemoryFlags, T_RelativeAddress, T_AbsoluteAddress, \
-    Program
-from .helpers import align_addr, int_from_bytes
-from .exceptions import OutOfMemoryException, InvalidAllocationException
+from typing import Dict, List, Optional
+
 from .colors import *
-from typing import Dict, List, Tuple, Optional
+from .exceptions import InvalidAllocationException
+from .helpers import align_addr, int_from_bytes
+from .types import Instruction, MemorySection, MemoryFlags, T_AbsoluteAddress, \
+    Program


 class MMU:
    """
-    The MemoryManagementUnit (handles loading binaries, and reading/writing data)
+    The MemoryManagementUnit. This provides a unified interface for reading/writing data from/to memory.
+
+    It also provides various translations for addresses.
    """

    max_size = 0xFFFFFFFF
@ -62,9 +65,9 @@ class MMU:
        return None

    def get_bin_containing(self, addr: T_AbsoluteAddress) -> Optional[Program]:
-        for exe in self.binaries:
-            if exe.base_addr <= addr < exe.base_addr + exe.size:
-                return exe
+        for program in self.programs:
+            if program.base <= addr < program.base + program.size:
+                return program
        return None

    def read_ins(self, addr: T_AbsoluteAddress) -> Instruction:
@ -140,7 +143,68 @@ class MMU:
    def read_int(self, addr: int) -> int:
        return int_from_bytes(self.read(addr, 4))

+    def translate_address(self, address: T_AbsoluteAddress) -> str:
+        # FIXME: proper implementation using the debug info
+        return str(address)
+
+    def has_continous_free_region(self, start: int, end: int) -> bool:
+        # if we have no sections we are all good
+        if len(self.sections) == 0:
+            return True
+        # if the last section is located before the start we are also good
+        if start > self.sections[-1].base + self.sections[-1].size:
+            return True
+
+        for sec in self.sections:
+            # skip all sections that end before the required start point
+            if sec.base + sec.size < start:
+                continue
+            # we now have the first section that doesn't end **before** the start point
+            # if this section starts after the specified end, we are good
+            if sec.base > end:
+                return True
+            # otherwise we can't continue
+            return False
+        # if all sections end before the requested start we are good
+        # technically we shouldn't ever reach this point, but better safe than sorry
+        return True
+
+    def load_program(self, program: Program, align_to: int = 4):
+        if program.base is not None:
+            if not self.has_continous_free_region(program.base, program.base + program.size):
+                print(FMT_MEM + "Cannot load program {} into desired space (0x{:0x}-0x{:0x}), area occupied.".format(
+                    program.name, program.base, program.base + program.size
+                ) + FMT_NONE)
+                raise InvalidAllocationException("Area occupied".format(
+                    program.name, program.base, program.base + program.size
+                ), program.name, program.size, MemoryFlags(False, True))
+
+            at_addr = program.base
+        else:
+            first_guaranteed_free_address = self.sections[-1].base + self.sections[-1].size
+            at_addr = align_addr(first_guaranteed_free_address, align_to)
+
+        # trigger the load event to set all addresses in the binary
+        program.loaded_trigger(at_addr)
+
+        # add program and sections to internal state
+        self.programs.append(program)
+        self.sections += program.sections
+        self._update_state()
+
+        # load all global symbols from program
+        self.global_symbols.update(
+            {key: program.context.labels[key] for key in program.global_labels}
+        )
+        # inject reference to global symbol table into program context
+        # FIXME: this is pretty unclean and should probably be solved in a better way in the future
+        program.context.global_symbol_dict = self.global_symbols
+
+    def _update_state(self):
+        self.programs.sort(key=lambda bin: bin.base)
+        self.sections.sort(key=lambda sec: sec.base)
+
    def __repr__(self):
        return "MMU(\n\t{}\n)".format(
-            "\n\t".join(repr(x) for x in self.sections)
+            "\n\t".join(repr(x) for x in self.programs)
        )
--- a/riscemu/init.py
+++ b/riscemu/init.py
@ -22,7 +22,7 @@ from .CPU import CPU

 from .config import RunConfig

-from .parser import tokenize, parse_tokens, parse_program_from_file
+from .parser import tokenize, parse_tokens, AssemblyFileLoader

 __author__ = "Anton Lydike <Anton@Lydike.com>"
 __copyright__ = "Copyright 2021 Anton Lydike"
--- a/riscemu/assembler.py
+++ b/riscemu/assembler.py
@ -2,14 +2,20 @@ from typing import Optional, Tuple, Union, List
 from enum import Enum, auto
 from typing import Optional, Tuple, Union

-from .helpers import parse_numeric_argument, align_addr, int_to_bytes
-from .base_types import Program, T_RelativeAddress, InstructionContext, Instruction
+from .helpers import parse_numeric_argument, align_addr, int_to_bytes, get_section_base_name
+from .types import Program, T_RelativeAddress, InstructionContext, Instruction
 from .colors import FMT_PARSE, FMT_NONE
 from .exceptions import ParseException, ASSERT_LEN, ASSERT_NOT_NULL
 from .tokenizer import Token
-from .types import BinaryDataMemorySection, InstructionMemorySection
+from .base import BinaryDataMemorySection, InstructionMemorySection

 INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini')
+"""
+A tuple containing all section names which contain executable code (instead of data)
+
+The first segment of each segment (first segment of ".text.main" is ".text") is checked
+against this list to determine the type of it.
+"""


 class MemorySectionType(Enum):
@ -64,17 +70,21 @@ class ParseContext:
        if self.section is None:
            return
        if self.section.type == MemorySectionType.Data:
-            section = BinaryDataMemorySection(self.section.data, self.section.name, self.context, self.program)
+            section = BinaryDataMemorySection(
+                self.section.data, self.section.name, self.context, self.program.name, self.section.base
+            )
            self.program.add_section(section)
        elif self.section.type == MemorySectionType.Instructions:
-            section = InstructionMemorySection(self.section.data, self.section.name, self.context, self.program)
+            section = InstructionMemorySection(
+                self.section.data, self.section.name, self.context, self.program.name, self.section.base
+            )
            self.program.add_section(section)
        self.section = None

-    def new_section(self, name: str, type: MemorySectionType):
+    def new_section(self, name: str, type: MemorySectionType, alignment: int = 4):
        base = 0
        if self.section is not None:
-            base = align_addr(self.section.current_address(), 4)
+            base = align_addr(self.section.current_address(), alignment)
            print("base at {}".format(base))
        self._finalize_section()
        self.section = CurrentSection(name, type, base)
@ -94,10 +104,6 @@ def ASSERT_IN_SECTION_TYPE(context: ParseContext, type: MemorySectionType):
        )


-def get_section_base_name(section_name: str) -> str:
-    return '.' + section_name.split('.')[1]
-
-
 class AssemblerDirectives:
    """
    This class represents a collection of all assembler directives as documented by
--- a/riscemu/base.py
+++ b/riscemu/base.py
@ -0,0 +1,81 @@
+"""
+This file contains a base implementation of Instruction, and MemorySection.
+
+This aims to be a simple base, usable for everyone who needs the basic functionality, but doesn't
+want to set up their own subtypes of Instruction and MemorySection
+"""
+
+from typing import List, Tuple
+from .exceptions import MemoryAccessException
+from .helpers import parse_numeric_argument
+from .types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \
+    T_AbsoluteAddress, Program
+
+
+class SimpleInstruction(Instruction):
+    def __init__(self, name: str, args: Tuple[str], context: InstructionContext, addr: T_RelativeAddress):
+        self.context = context
+        self.name = name
+        self.args = args
+        self.addr = addr
+
+    def get_imm(self, num: int) -> int:
+        resolved_label = self.context.resolve_label(self.args[num], self.addr)
+        if resolved_label is None:
+            return parse_numeric_argument(self.args[num])
+        return resolved_label
+
+    def get_imm_reg(self, num: int) -> Tuple[int, str]:
+        return self.get_imm(num + 1), self.get_reg(num)
+
+    def get_reg(self, num: int) -> str:
+        return self.args[num]
+
+
+class InstructionMemorySection(MemorySection):
+    def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0):
+        self.name = name
+        self.base = base
+        self.context = context
+        self.size = len(instructions) * 4
+        self.flags = MemoryFlags(True, True)
+        self.instructions = instructions
+        self.owner = owner
+
+    def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
+        raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
+
+    def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
+        raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
+
+    def read_ins(self, offset: T_RelativeAddress) -> Instruction:
+        if offset % 4 != 0:
+            raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
+        return self.instructions[offset // 4]
+
+
+class BinaryDataMemorySection(MemorySection):
+    def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None):
+        self.name = name
+        self.base = base
+        self.context = context
+        self.size = len(data)
+        self.flags = flags if flags is not None else MemoryFlags(False, False)
+        self.data = data
+        self.owner = owner
+
+    def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
+        if offset + size > self.size:
+            raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
+        return self.data[offset:offset + size]
+
+    def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
+        if offset + size > self.size:
+            raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
+        if len(data[0:size]) != size:
+            raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
+        self.data[offset:offset + size] = data[0:size]
+
+    def read_ins(self, offset: T_RelativeAddress) -> Instruction:
+        raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
+                                    offset, 4, 'instruction fetch')
--- a/riscemu/base_types.py
+++ b/riscemu/base_types.py
@ -1,188 +0,0 @@
-"""
-RiscEmu (c) 2021 Anton Lydike
-
-SPDX-License-Identifier: MIT
-
-This file contains base classes which represent loaded programs
-"""
-
-import re
-from abc import ABC, abstractmethod
-from collections import defaultdict
-from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple, Set
-
-from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE
-from .exceptions import ParseException
-from .helpers import format_bytes
-
-T_RelativeAddress = int
-T_AbsoluteAddress = int
-
-NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
-
-
-@dataclass(frozen=True)
-class MemoryFlags:
-    read_only: bool
-    executable: bool
-
-    def __repr__(self):
-        return "{}({},{})".format(
-            self.__class__.__name__,
-            'ro' if self.read_only else 'rw',
-            'x' if self.executable else '-'
-        )
-
-
-class InstructionContext:
-    base_address: T_AbsoluteAddress
-    """
-    The address where the instruction block is placed
-    """
-
-    labels: Dict[str, T_RelativeAddress]
-    """
-    This dictionary maps all labels to their relative position of the instruction block
-    """
-    numbered_labels: Dict[str, List[T_RelativeAddress]]
-    """
-    This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where 
-    the label was placed 
-    """
-
-    def __init__(self):
-        self.labels = dict()
-        self.numbered_labels = defaultdict(list)
-        self.base_address = 0
-
-    def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_RelativeAddress]:
-        if NUMBER_SYMBOL_PATTERN.match(symbol):
-            if address_at is None:
-                raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
-
-            direction = symbol[-1]
-            if direction == 'b':
-                return max([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr < address_at],
-                           default=None)
-            else:
-                return min([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr > address_at],
-                           default=None)
-        else:
-            return self.labels.get(symbol, None)
-
-
-class Instruction(ABC):
-    name: str
-    args: tuple
-
-    @abstractmethod
-    def get_imm(self, num: int) -> int:
-        """
-        parse and get immediate argument
-        """
-        pass
-
-    @abstractmethod
-    def get_imm_reg(self, num: int) -> Tuple[int, str]:
-        """
-        parse and get an argument imm(reg)
-        """
-        pass
-
-    @abstractmethod
-    def get_reg(self, num: int) -> str:
-        """
-        parse and get an register argument
-        """
-        pass
-
-    def __repr__(self):
-        return "{} {}".format(self.name, ", ".join(self.args))
-
-
-@dataclass
-class MemorySection(ABC):
-    name: str
-    flags: MemoryFlags
-    size: int
-    base: T_AbsoluteAddress
-    owner: str
-    context: InstructionContext
-
-    @abstractmethod
-    def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
-        pass
-
-    @abstractmethod
-    def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
-        pass
-
-    @abstractmethod
-    def read_ins(self, offset: T_RelativeAddress) -> Instruction:
-        pass
-
-    def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress], fmt: str = 'hex',
-             bytes_per_row: int = 16, rows: int = 10, group: int = 4):
-        if self.flags.executable:
-            bytes_per_row = 4
-        highlight = None
-        if end is None:
-            end = start + (bytes_per_row * (rows // 2))
-            highlight = start
-            start = start - (bytes_per_row * (rows // 2))
-        if self.flags.executable:
-            print(FMT_MEM + "{}, viewing {} instructions:".format(
-                self, (end - start) // 4
-            ) + FMT_NONE)
-
-            for addr in range(start, end, 4):
-                if addr == highlight:
-                    print(FMT_UNDERLINE + FMT_ORANGE, end='')
-                print("0x{:x}: {}{}".format(
-                    self.base + addr, self.read_ins(addr), FMT_NONE
-                ))
-        else:
-            print(FMT_MEM + "{}, viewing {} bytes:".format(
-                self, (end - start)
-            ) + FMT_NONE)
-
-            for addr in range(start, end, bytes_per_row):
-                hi_ind = (highlight - addr) // group
-                print("0x{:x}: {}{}".format(
-                    self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
-                ))
-
-    def __repr__(self):
-        return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
-            self.__class__.__name__,
-            self.name,
-            self.base,
-            self.size,
-            self.flags,
-            self.owner
-        )
-
-
-class Program:
-    name: str
-    context: InstructionContext
-    global_labels: Set[str]
-    sections: List[MemorySection]
-    base: T_AbsoluteAddress = 0
-
-    def __init__(self, name: str, base: int = 0):
-        self.name = name
-        self.context = InstructionContext()
-        self.sections = []
-        self.base = base
-        self.global_labels = set()
-
-    def add_section(self, sec: MemorySection):
-        self.sections.append(sec)
-
-    def __repr__(self):
-        return "{}(name={},context={},globals={},sections={},base={})".format(
-            self.__class__.__name__, self.name, self.context, self.global_labels,
-            [s.name for s in self.sections], self.base
-        )
--- a/riscemu/debug.py
+++ b/riscemu/debug.py
@ -7,7 +7,7 @@ SPDX-License-Identifier: MIT
 import typing
 from .registers import Registers
 from .colors import FMT_DEBUG, FMT_NONE
-from .base_types import Instruction
+from .types import Instruction
 from .helpers import *

 if typing.TYPE_CHECKING:
--- a/riscemu/exceptions.py
+++ b/riscemu/exceptions.py
@ -5,8 +5,11 @@ SPDX-License-Identifier: MIT
 """

 from abc import abstractmethod
-from .base_types import Instruction
 from .colors import *
+import typing
+
+if typing.TYPE_CHECKING:
+    from .types import Instruction


 class RiscemuBaseException(BaseException):
@ -112,7 +115,7 @@ class InvalidAllocationException(RiscemuBaseException):


 class UnimplementedInstruction(RiscemuBaseException):
-    def __init__(self, ins: Instruction):
+    def __init__(self, ins: 'Instruction'):
        self.ins = ins

    def message(self):
--- a/riscemu/helpers.py
+++ b/riscemu/helpers.py
@ -139,3 +139,9 @@ class Peekable(Generic[T], Iterator[T]):

    def is_empty(self) -> bool:
        return self.peek() is None
+
+
+def get_section_base_name(section_name: str) -> str:
+    if '.' not in section_name:
+        print(FMT_PARSE + f"Invalid section {section_name}, not starting with a dot!" + FMT_NONE)
+    return '.' + section_name.split('.')[1]
--- a/riscemu/instructions/InstructionSet.py
+++ b/riscemu/instructions/InstructionSet.py
@ -10,7 +10,7 @@ from abc import ABC
 from ..CPU import CPU
 from ..helpers import to_unsigned
 from ..exceptions import ASSERT_LEN, ASSERT_IN
-from ..base_types import Instruction
+from ..types import Instruction


 class InstructionSet(ABC):
--- a/riscemu/instructions/RV32I.py
+++ b/riscemu/instructions/RV32I.py
@ -11,7 +11,7 @@ from ..colors import FMT_DEBUG, FMT_NONE
 from ..debug import launch_debug_session
 from ..exceptions import LaunchDebuggerException
 from ..syscall import Syscall
-from ..base_types import Instruction
+from ..types import Instruction


 class RV32I(InstructionSet):
--- a/riscemu/parser.py
+++ b/riscemu/parser.py
@ -5,15 +5,15 @@ SPDX-License-Identifier: MIT
 """
 import os
 import re
-from typing import Dict, Tuple, Iterable, Callable
+from typing import Dict, Tuple, Iterable, Callable, List

 from .helpers import Peekable
 from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
-from .base_types import Program
+from .types import Program, T_ParserOpts, ProgramLoader
 from .colors import FMT_PARSE
 from .exceptions import ParseException
 from .tokenizer import Token, TokenType, tokenize
-from .types import SimpleInstruction
+from .base import SimpleInstruction


 def parse_instruction(token: Token, args: Tuple[str], context: ParseContext):
@ -53,7 +53,6 @@ def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
    for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
        if token.type not in PARSERS:
            raise ParseException("Unexpected token type: {}, {}".format(token, args))
-        print("{} {}".format(token, args))
        PARSERS[token.type](token, args, context)

    return context.finalize()
@ -92,9 +91,37 @@ def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
            next(tokens)
            break
        break
-        #raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
+        # raise ParseException("Expected newline, instead got {}".format(tokens.peek()))


-def parse_program_from_file(path: str) -> Program:
-    with open(path, 'r') as f:
-        return parse_tokens(os.path.split(path)[-1], tokenize(f))
+class AssemblyFileLoader(ProgramLoader):
+    """
+    This class loads assembly files written by hand. It understands some assembler directives and supports most
+    pseudo instructions. It does very little verification of source correctness.
+
+    It also supports numbered jump targets and properly supports local and global scope (.globl assembly directive)
+
+
+    The AssemblyFileLoader loads .asm, .S and .s files by default, and acts as a weak fallback to all other filetypes.
+    """
+    def parse(self) -> Program:
+        with open(self.source_path, 'r') as f:
+            return parse_tokens(self.filename, tokenize(f))
+
+    @classmethod
+    def can_parse(cls, source_path: str) -> float:
+        """
+
+        It also acts as a weak fallback if no other loaders want to take the file.
+
+        :param source_path: the path to the source file
+        :return:
+        """
+        # gcc recognizes these line endings as assembly. So we will do too.
+        if source_path.split('.')[-1] in ('asm', 'S', 's'):
+            return 1
+        return 0.01
+
+    @classmethod
+    def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
+        return argv, {}
--- a/riscemu/priv/ElfLoader.py
+++ b/riscemu/priv/ElfLoader.py
@ -1,11 +1,9 @@
-from dataclasses import dataclass
-from typing import List, Dict, Tuple
+from typing import List

 from .Exceptions import *
-from ..exceptions import RiscemuBaseException
-from ..base_types import MemoryFlags, LoadedMemorySection
-from ..decoder import decode, RISCV_REGS, format_ins
+from .types import ElfMemorySection
 from ..helpers import FMT_PARSE, FMT_NONE, FMT_GREEN, FMT_BOLD
+from ..types import MemoryFlags, Program, ProgramLoader, T_ParserOpts

 FMT_ELF = FMT_GREEN + FMT_BOLD

@ -13,41 +11,53 @@ if typing.TYPE_CHECKING:
    from elftools.elf.elffile import ELFFile
    from elftools.elf.sections import Section, SymbolTableSection

-# This requires pyelftools package!
-
 INCLUDE_SEC = ('.text', '.stack', '.bss', '.sdata', '.sbss')


-class ElfExecutable:
-    sections: List['ElfLoadedMemorySection']
-    sections_by_name: Dict[str, 'ElfLoadedMemorySection']
-    symbols: Dict[str, int]
-    run_ptr: int
+class ElfBinaryFileLoader(ProgramLoader):
+    """
+    Loads compiled elf binaries (checks for the magic sequence 7f45 4c46)

-    def __init__(self, name: str):
-        self.sections = list()
-        self.sections_by_name = dict()
-        self.symbols = dict()
+    This loader respects local and global symbols.
+    """
+    program: Program

+    def __init__(self, source_path: str, options: T_ParserOpts):
+        super().__init__(source_path, options)
+        self.program = Program(self.filename)
+
+    @classmethod
+    def can_parse(cls, source_path: str) -> float:
+        with open(source_path, 'rb') as f:
+            if f.read(4) == b'\x7f\x45\x4c\x46':
+                return 1
+        return 0
+
+    @classmethod
+    def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
+        return argv, {}
+
+    def parse(self) -> Program:
        try:
            from elftools.elf.elffile import ELFFile
            from elftools.elf.sections import Section, SymbolTableSection

-            with open(name, 'rb') as f:
-                print(FMT_ELF + "[ElfLoader] Loading elf executable from: {}".format(name) + FMT_NONE)
+            with open(self.source_path, 'rb') as f:
+                print(FMT_ELF + "[ElfLoader] Loading elf executable from: {}".format(self.source_path) + FMT_NONE)
                self._read_elf(ELFFile(f))
        except ImportError as e:
-            print(FMT_PARSE + "[ElfLoader] Cannot load elf files without PyElfTools package! You can install them using pip install pyelftools!" + FMT_NONE)
+            print(FMT_PARSE + "[ElfLoader] Cannot load elf files without PyElfTools package! You can install them "
+                              "using pip install pyelftools!" + FMT_NONE)
            raise e

+        return self.program
+
    def _read_elf(self, elf: 'ELFFile'):
        if not elf.header.e_machine == 'EM_RISCV':
            raise InvalidElfException("Not a RISC-V elf file!")
        if not elf.header.e_ident.EI_CLASS == 'ELFCLASS32':
            raise InvalidElfException("Only 32bit executables are supported!")

-        self.run_ptr = elf.header.e_entry
-
        from elftools.elf.sections import SymbolTableSection
        for sec in elf.iter_sections():
            if isinstance(sec, SymbolTableSection):
@ -57,29 +67,31 @@ class ElfExecutable:
            if sec.name not in INCLUDE_SEC:
                continue

-            self.add_sec(self._lms_from_elf_sec(sec, 'kernel'))
+            self._add_sec(self._lms_from_elf_sec(sec, self.filename))

    def _lms_from_elf_sec(self, sec: 'Section', owner: str):
        is_code = sec.name in ('.text',)
        data = bytearray(sec.data())
+        if len(data) < sec.data_size:
+            data += bytearray(len(data) - sec.data_size)
        flags = MemoryFlags(is_code, is_code)
        print(FMT_ELF + "[ElfLoader] Section {} at: {:X}".format(sec.name, sec.header.sh_addr) + FMT_NONE)
-        return ElfLoadedMemorySection(
-            sec.name,
-            sec.header.sh_addr,
-            sec.data_size,
-            data,
-            flags,
-            owner
+        return ElfMemorySection(
+            data, sec.name, self.program.context, owner, sec.header.sh_addr, flags
        )

    def _parse_symtab(self, symtab: 'SymbolTableSection'):
-        self.symbols = {
-            sym.name: sym.entry.st_value for sym in symtab.iter_symbols() if sym.name
-        }
+        for sym in symtab.iter_symbols():
+            if not sym.name:
+                continue
+            self.program.context.labels[sym.name] = sym.entry.st_value
+            # check if it has st_visibility bit set
+            if sym.entry.st_shndx == 1:  # STB_GLOBAL = 1
+                self.program.global_labels.add(sym.name)
+                print(FMT_PARSE + "LOADED GLOBAL SYMBOL {}: {}".format(sym.name, sym.entry.st_value) + FMT_NONE)

-    def add_sec(self, new_sec: 'ElfLoadedMemorySection'):
-        for sec in self.sections:
+    def _add_sec(self, new_sec: 'ElfMemorySection'):
+        for sec in self.program.sections:
            if sec.base < sec.end <= new_sec.base or sec.end > sec.base >= new_sec.end:
                continue
            else:
@ -88,78 +100,4 @@ class ElfExecutable:
                ) + FMT_NONE)
                raise RuntimeError("Cannot load elf with overlapping sections!")

-        self.sections.append(new_sec)
-        self.sections_by_name[new_sec.name] = new_sec
-
-
-class InvalidElfException(RiscemuBaseException):
-    def __init__(self, msg: str):
-        super().__init__()
-        self.msg = msg
-
-    def message(self):
-        return FMT_PARSE + "{}(\"{}\")".format(self.__class__.__name__, self.msg) + FMT_NONE
-
-
-@dataclass(frozen=True)
-class ElfInstruction:
-    name: str
-    args: List[int]
-    encoded: int
-
-    def get_imm(self, num: int) -> int:
-        return self.args[num]
-
-    def get_imm_reg(self, num: int) -> Tuple[int, int]:
-        return self.args[-1], self.args[-2]
-
-    def get_reg(self, num: int) -> str:
-        return RISCV_REGS[self.args[num]]
-
-    def __repr__(self) -> str:
-        if self.name == 'jal' and self.args[0] == 0:
-            return "j       {}".format(self.args[1])
-        if self.name == 'addi' and self.args[2] == 0:
-            return "mv      {}, {}".format(self.get_reg(0), self.get_reg(1))
-        if self.name == 'addi' and self.args[1] == 0:
-            return "li      {}, {}".format(self.get_reg(0), self.args[2])
-        if self.name == 'ret' and len(self.args) == 0:
-            return "ret"
-        return format_ins(self.encoded, self.name)
-        # if self.name in ('lw', 'lh', 'lb', 'lbu', 'lhu', 'sw', 'sh', 'sb'):
-        #     args = "{}, {}({})".format(
-        #         RISCV_REGS[self.args[0]], self.args[2], RISCV_REGS[self.args[1]]
-        #     )
-        # else:
-        #     args = ", ".join(map(str, self.args))
-        # return "{:<8} {}".format(
-        #     self.name,
-        #     args
-        # )
-
-
-class ElfLoadedMemorySection(LoadedMemorySection):
-    ins_cache: List[Optional[ElfInstruction]]
-    """
-    A fast cache for accessing pre-decoded instructions
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.__setattr__('ins_cache', [None] * (self.size // 4))
-
-    def read_instruction(self, offset):
-        if self.ins_cache[offset//4] is not None:
-            return self.ins_cache[offset//4]
-        if not self.flags.executable:
-            print(FMT_PARSE + "Reading instruction from non-executable memory!" + FMT_NONE)
-            raise InstructionAccessFault(offset + self.base)
-        if offset % 4 != 0:
-            raise InstructionAddressMisalignedTrap(offset + self.base)
-        ins = ElfInstruction(*decode(self.content[offset:offset + 4]))
-        self.ins_cache[offset // 4] = ins
-        return ins
-
-    @property
-    def end(self):
-        return self.size + self.base
+        self.program.add_section(new_sec)
--- a/riscemu/priv/Exceptions.py
+++ b/riscemu/priv/Exceptions.py
@ -5,6 +5,9 @@ from .CSRConsts import MCAUSE_TRANSLATION

 import typing

+from .. import RiscemuBaseException
+from ..colors import FMT_PARSE, FMT_NONE
+
 if typing.TYPE_CHECKING:
    from .ElfLoader import ElfInstruction

@ -52,14 +55,17 @@ class CpuTrap(BaseException):
    def mcause(self):
        return (self.interrupt << 31) + self.code

+    def message(self) -> str:
+        return ""
+
    def __repr__(self):
        name = "Reserved interrupt({}, {})".format(self.interrupt, self.code)

        if (self.interrupt, self.code) in MCAUSE_TRANSLATION:
            name = MCAUSE_TRANSLATION[(self.interrupt, self.code)] + "({}, {})".format(self.interrupt, self.code)

-        return "{} {{priv={}, type={}, mtval={:x}}}".format(
-            name, self.priv.name, self.type.name, self.mtval
+        return "{} {{priv={}, type={}, mtval={:x}}} {}".format(
+            name, self.priv.name, self.type.name, self.mtval, self.message()
        )

    def __str__(self):
@ -89,3 +95,29 @@ class TimerInterrupt(CpuTrap):
 class EcallTrap(CpuTrap):
    def __init__(self, mode: PrivModes):
        super().__init__(mode.value + 8, 0, CpuTrapType.EXCEPTION)
+
+
+class InvalidElfException(RiscemuBaseException):
+    def __init__(self, msg: str):
+        super().__init__()
+        self.msg = msg
+
+    def message(self):
+        return FMT_PARSE + "{}(\"{}\")".format(self.__class__.__name__, self.msg) + FMT_NONE
+
+
+class LoadAccessFault(CpuTrap):
+    def __init__(self, msg, addr, size, op):
+        super(LoadAccessFault, self).__init__(5, addr, CpuTrapType.EXCEPTION)
+        self.msg = msg
+        self.addr = addr
+        self.size = size
+        self.op = op
+
+    def message(self):
+        return "(During {} at 0x{:08x} of size {}: {})".format(
+            self.op,
+            self.addr,
+            self.size,
+            self.msg
+        )
--- a/riscemu/priv/ImageLoader.py
+++ b/riscemu/priv/ImageLoader.py
@ -2,124 +2,74 @@
 Laods a memory image with debug information into memory
 """

-import json
-from functools import lru_cache
-from typing import Dict, List, Optional, TYPE_CHECKING
+import os.path
+from typing import List, Iterable

-from .ElfLoader import ElfInstruction, ElfLoadedMemorySection, InstructionAccessFault, InstructionAddressMisalignedTrap
-from .PrivMMU import PrivMMU
-from ..config import RunConfig
-from ..base_types import LoadedMemorySection, MemoryFlags
-from ..IO.IOModule import IOModule
-from ..colors import FMT_ERROR, FMT_NONE, FMT_MEM
-from ..decoder import decode
-
-if TYPE_CHECKING:
-    pass
+from .ElfLoader import ElfMemorySection
+from .types import MemoryImageDebugInfos
+from ..assembler import INSTRUCTION_SECTION_NAMES
+from ..colors import FMT_NONE, FMT_PARSE
+from ..helpers import get_section_base_name
+from ..types import MemoryFlags, ProgramLoader, Program, T_ParserOpts


-class MemoryImageMMU(PrivMMU):
-    io: List[IOModule]
-    data: bytearray
-    io_start: int
-    debug_info: Dict[str, Dict[str, Dict[str, str]]]
+class MemoryImageLoader(ProgramLoader):

-    def __init__(self, file_name: str, io_start: int = 0xFF0000):
-        super(MemoryImageMMU, self).__init__(conf=RunConfig())
+    @classmethod
+    def can_parse(cls, source_path: str) -> float:
+        if source_path.split('.')[-1] == '.img':
+            return 1
+        return 0

-        with open(file_name, 'rb') as memf:
-            data = memf.read()
-        with open(file_name + '.dbg', 'r') as dbgf:
-            debug_info: Dict = json.load(dbgf)
+    @classmethod
+    def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
+        return argv, {}

-        self.data = bytearray(data)
-        # TODO: super wasteful memory allocation happening here
-        if len(data) < io_start:
-            self.data += bytearray(io_start - len(data))
-        self.debug_info = debug_info
-        self.io_start = io_start
-        self.io = list()
+    def parse(self) -> Iterable[Program]:
+        if self.options.get('debug', False):
+            yield self.parse_no_debug()
+            return

-    def get_entrypoint(self):
-        try:
-            start = self.debug_info['symbols']['kernel'].get('_start', None)
-            if start is not None:
-                return start
-            return self.debug_info['symbols']['kernel'].get('_ftext')
-        except KeyError:
-            print(FMT_ERROR + '[MMU] cannot find kernel entry in debug information! Falling back to 0x100' + FMT_NONE)
-            return 0x100
+        with open(self.options.get('debug'), 'r') as debug_file:
+            debug_info = MemoryImageDebugInfos.load(debug_file.read())

-    @lru_cache(maxsize=2048)
-    def read_ins(self, addr: int) -> ElfInstruction:
-        if addr >= self.io_start:
-            raise InstructionAccessFault(addr)
-        if addr % 4 != 0:
-            raise InstructionAddressMisalignedTrap(addr)
+        with open(self.source_path, 'rb') as source_file:
+            data: bytearray = bytearray(source_file.read())

-        return ElfInstruction(*decode(self.data[addr:addr + 4]))
+        for name, sections in debug_info.sections.items():
+            program = Program(name)

-    def read(self, addr: int, size: int) -> bytearray:
-        if addr < 0x100:
-            pc = self.cpu.pc
-            text_sec = self.get_sec_containing(pc)
-            print(FMT_ERROR + "[MMU] possible null dereference (read {:x}) from (pc={:x},sec={},rel={:x})".format(
-                addr, pc, text_sec.owner + ':' + text_sec.name, pc - text_sec.base
-            ) + FMT_NONE)
-        if addr >= self.io_start:
-            return self.io_at(addr).read(addr, size)
-        return self.data[addr: addr + size]
+            for sec_name, (start, size) in sections.items():
+                if program.base is None:
+                    program.base = start

-    def write(self, addr: int, size: int, data):
-        if addr < 0x100:
-            pc = self.cpu.pc
-            text_sec = self.get_sec_containing(pc)
-            print(FMT_ERROR + "[MMU] possible null dereference (write {:x}) from (pc={:x},sec={},rel={:x})".format(
-                addr, pc, text_sec.owner + ':' + text_sec.name, pc - text_sec.base
-            ) + FMT_NONE)
+                in_code_sec = get_section_base_name(sec_name) in INSTRUCTION_SECTION_NAMES
+                program.add_section(
+                    ElfMemorySection(
+                        data[start:start+size], sec_name, program.context,
+                        name, start, MemoryFlags(in_code_sec, in_code_sec)
+                    )
+                )

-        if addr >= self.io_start:
-            return self.io_at(addr).write(addr, data, size)
-        self.data[addr:addr + size] = data[0:size]
+            program.context.labels.update(debug_info.symbols.get(name, dict()))
+            program.global_labels.update(debug_info.globals.get(name, set()))

-    def io_at(self, addr) -> IOModule:
-        for mod in self.io:
-            if mod.contains(addr):
-                return mod
-        raise InstructionAccessFault(addr)
+            yield program

-    def add_io(self, io: IOModule):
-        self.io.append(io)
+    def parse_no_debug(self) -> Program:
+        print(FMT_PARSE + "[MemoryImageLoader] Warning: loading memory image without debug information!" + FMT_NONE)

-    def __repr__(self):
-        return "MemoryImageMMU()"
+        with open(self.source_path, 'rb') as source_file:
+            data: bytes = source_file.read()

-    @lru_cache(maxsize=32)
-    def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]:
-        next_sec = len(self.data)
-        for sec_addr, name in reversed(self.debug_info['sections'].items()):
-            if addr >= int(sec_addr):
-                owner, name = name.split(':')
-                base = int(sec_addr)
-                size = next_sec - base
-                flags = MemoryFlags('.text' in name, '.text' in name)
-                return ElfLoadedMemorySection(name, base, size, self.data[base:next_sec], flags, owner)
-            else:
-                next_sec = int(sec_addr)
+        p = Program(self.filename)
+        p.add_section(ElfMemorySection(
+            bytearray(data), 'memory image contents', p.context, p.name, 0, MemoryFlags(False, True)
+        ))
+        return p

-    def translate_address(self, addr: int):
-        sec = self.get_sec_containing(addr)
-        if sec.name == '.empty':
-            return "<empty>"
-        symbs = self.debug_info['symbols'][sec.owner]
-        for sym, val in reversed(symbs.items()):
-            if addr >= val:
-                return "{}{:+x} ({}:{})".format(sym, addr - val, sec.owner, sec.name)
-        return "{}:{}{:+x}".format(sec.owner, sec.name, addr - sec.base)
-
-    def label(self, symb: str):
-        print(FMT_MEM + "Looking up symbol {}".format(symb))
-        for owner, symbs in self.debug_info['symbols'].items():
-            if symb in symbs:
-                print("  Hit in {}: {} = {}".format(owner, symb, symbs[symb]))
-        print(FMT_NONE, end="")
+    @classmethod
+    def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
+        if os.path.exists(source_path + '.dbg'):
+            return MemoryImageLoader(source_path, dict(**options, debug=source_path + '.dbg'))
+        return MemoryImageLoader(source_path, options)
--- a/riscemu/priv/PrivCPU.py
+++ b/riscemu/priv/PrivCPU.py
@ -15,7 +15,7 @@ from ..IO import TextIO
 from ..instructions import RV32A, RV32M

 if typing.TYPE_CHECKING:
-    from riscemu import base_types, LoadedExecutable, LoadedInstruction
+    from riscemu import types, LoadedExecutable, LoadedInstruction
    from riscemu.instructions.InstructionSet import InstructionSet


@ -25,7 +25,7 @@ class PrivCPU(CPU):

    It should support M and U Mode, but no U-Mode Traps.

-    This allows us to
+    This is meant to emulate whole operating systems.
    """

    csr: CSR
@ -44,17 +44,11 @@ class PrivCPU(CPU):
    the equivalent of "1 byte" (this is actually impossible)
    """

-    def __init__(self, conf, mmu: PrivMMU):
+    def __init__(self, conf):
        super().__init__(conf, [PrivRV32I, RV32M, RV32A])
+        # start in machine mode
        self.mode: PrivModes = PrivModes.MACHINE

-        mmu.set_cpu(self)
-        self.pc = mmu.get_entrypoint()
-        self.mmu = mmu
-
-        if hasattr(self.mmu, 'add_io'):
-            self.mmu.add_io(TextIO.TextIO(0xff0000, 64))
-
        self.syscall_int = None
        self.launch_debug = False
        self.pending_traps: List[CpuTrap] = list()
--- a/riscemu/priv/PrivRV32I.py
+++ b/riscemu/priv/PrivRV32I.py
@ -21,7 +21,7 @@ class PrivRV32I(RV32I):
    This is an extension of RV32I, written for the PrivCPU class
    """

-    def instruction_csrrw(self, ins: 'LoadedInstruction'):
+    def instruction_csrrw(self, ins: 'Instruction'):
        rd, rs, csr_addr = self.parse_crs_ins(ins)
        old_val = None
        if rd != 'zero':
@ -34,7 +34,7 @@ class PrivRV32I(RV32I):
        if old_val is not None:
            self.regs.set(rd, old_val)

-    def instruction_csrrs(self, ins: 'LoadedInstruction'):
+    def instruction_csrrs(self, ins: 'Instruction'):
        rd, rs, csr_addr = self.parse_crs_ins(ins)
        if rs != 'zero':
            # oh no, this should not happen!
@ -45,13 +45,13 @@ class PrivRV32I(RV32I):
            self.regs.set(rd, old_val)


-    def instruction_csrrc(self, ins: 'LoadedInstruction'):
+    def instruction_csrrc(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_csrrsi(self, ins: 'LoadedInstruction'):
+    def instruction_csrrsi(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_csrrwi(self, ins: 'LoadedInstruction'):
+    def instruction_csrrwi(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        rd, imm, addr = ins.get_reg(0), ins.get_imm(1), ins.get_imm(2)
        if rd != 'zero':
@ -62,10 +62,10 @@ class PrivRV32I(RV32I):
        self.cpu.csr.set(addr, imm)


-    def instruction_csrrci(self, ins: 'LoadedInstruction'):
+    def instruction_csrrci(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_mret(self, ins: 'LoadedInstruction'):
+    def instruction_mret(self, ins: 'Instruction'):
        if self.cpu.mode != PrivModes.MACHINE:
            print("MRET not inside machine level code!")
            raise IllegalInstructionTrap(ins)
@ -90,53 +90,53 @@ class PrivRV32I(RV32I):
                if self.cpu.conf.verbosity > 1:
                    self.regs.dump_reg_a()

-    def instruction_uret(self, ins: 'LoadedInstruction'):
+    def instruction_uret(self, ins: 'Instruction'):
        raise IllegalInstructionTrap(ins)

-    def instruction_sret(self, ins: 'LoadedInstruction'):
+    def instruction_sret(self, ins: 'Instruction'):
        raise IllegalInstructionTrap(ins)

-    def instruction_scall(self, ins: 'LoadedInstruction'):
+    def instruction_scall(self, ins: 'Instruction'):
        """
        Overwrite the scall from userspace RV32I
        """
        raise EcallTrap(self.cpu.mode)

-    def instruction_beq(self, ins: 'LoadedInstruction'):
+    def instruction_beq(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 == rs2:
            self.pc += dst - 4

-    def instruction_bne(self, ins: 'LoadedInstruction'):
+    def instruction_bne(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 != rs2:
            self.pc += dst - 4

-    def instruction_blt(self, ins: 'LoadedInstruction'):
+    def instruction_blt(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 < rs2:
            self.pc += dst - 4

-    def instruction_bge(self, ins: 'LoadedInstruction'):
+    def instruction_bge(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 >= rs2:
            self.pc += dst - 4

-    def instruction_bltu(self, ins: 'LoadedInstruction'):
+    def instruction_bltu(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
        if rs1 < rs2:
            self.pc += dst - 4

-    def instruction_bgeu(self, ins: 'LoadedInstruction'):
+    def instruction_bgeu(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
        if rs1 >= rs2:
            self.pc += dst - 4

    # technically deprecated
-    def instruction_j(self, ins: 'LoadedInstruction'):
+    def instruction_j(self, ins: 'Instruction'):
        raise NotImplementedError("Should never be reached!")

-    def instruction_jal(self, ins: 'LoadedInstruction'):
+    def instruction_jal(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
        addr = ins.get_imm(1)
@ -148,20 +148,20 @@ class PrivRV32I(RV32I):
        self.regs.set(reg, self.pc)
        self.pc += addr - 4

-    def instruction_jalr(self, ins: 'LoadedInstruction'):
+    def instruction_jalr(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        rd, rs, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(rd, self.pc)
        self.pc = rs + imm - 4

-    def instruction_sbreak(self, ins: 'LoadedInstruction'):
+    def instruction_sbreak(self, ins: 'Instruction'):
        raise LaunchDebuggerException()

-    def parse_crs_ins(self, ins: 'LoadedInstruction'):
+    def parse_crs_ins(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        return ins.get_reg(0), ins.get_reg(1), ins.get_imm(2)

-    def parse_mem_ins(self, ins: 'LoadedInstruction') -> Tuple[str, int]:
+    def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, int]:
        ASSERT_LEN(ins.args, 3)
        addr = self.get_reg_content(ins, 1) + ins.get_imm(2)
        reg = ins.get_reg(0)
--- a/riscemu/priv/types.py
+++ b/riscemu/priv/types.py
@ -0,0 +1,140 @@
+import json
+from collections import defaultdict
+from dataclasses import dataclass
+from functools import lru_cache
+from typing import Tuple, Dict, Set
+
+from riscemu import MemoryAccessException
+from riscemu.priv.Exceptions import InstructionAccessFault, InstructionAddressMisalignedTrap, LoadAccessFault
+from riscemu.types import Instruction, InstructionContext, T_RelativeAddress, MemoryFlags, T_AbsoluteAddress
+from riscemu.base import BinaryDataMemorySection
+from riscemu.colors import FMT_NONE, FMT_PARSE
+from riscemu.decoder import format_ins, RISCV_REGS, decode
+
+
+@dataclass(frozen=True)
+class ElfInstruction(Instruction):
+    name: str
+    args: Tuple[int]
+    encoded: int
+
+    def get_imm(self, num: int) -> int:
+        return self.args[num]
+
+    def get_imm_reg(self, num: int) -> Tuple[int, int]:
+        return self.args[-1], self.args[-2]
+
+    def get_reg(self, num: int) -> str:
+        return RISCV_REGS[self.args[num]]
+
+    def __repr__(self) -> str:
+        if self.name == 'jal' and self.args[0] == 0:
+            return "j       {}".format(self.args[1])
+        if self.name == 'addi' and self.args[2] == 0:
+            return "mv      {}, {}".format(self.get_reg(0), self.get_reg(1))
+        if self.name == 'addi' and self.args[1] == 0:
+            return "li      {}, {}".format(self.get_reg(0), self.args[2])
+        if self.name == 'ret' and len(self.args) == 0:
+            return "ret"
+        return format_ins(self.encoded, self.name)
+
+
+class ElfMemorySection(BinaryDataMemorySection):
+    def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int,
+                 flags: MemoryFlags):
+        super().__init__(data, name, context, owner, base=base, flags=flags)
+
+    @lru_cache
+    def read_ins(self, offset):
+        if not self.flags.executable:
+            print(FMT_PARSE + "Reading instruction from non-executable memory!" + FMT_NONE)
+            raise InstructionAccessFault(offset + self.base)
+        if offset % 4 != 0:
+            raise InstructionAddressMisalignedTrap(offset + self.base)
+        return ElfInstruction(*decode(self.data[offset:offset + 4]))
+
+    def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
+        if self.flags.read_only:
+            raise LoadAccessFault('read-only section', offset + self.base, size, 'write')
+        self.read_ins.cache_clear()
+        return super(ElfMemorySection, self).write(offset, size, data)
+
+    @property
+    def end(self):
+        return self.size + self.base
+
+
+class MemoryImageDebugInfos:
+    VERSION = '1'
+    """
+    Schema version
+    """
+
+    base: T_AbsoluteAddress = 0
+    """
+    The base address where the image starts. Defaults to zero.
+    """
+
+    sections: Dict[str, Dict[str, Tuple[int, int]]]
+    """
+    This dictionary maps a program and section to (start address, section length)
+    """
+
+    symbols: Dict[str, Dict[str, int]]
+    """
+    This dictionary maps a program and a symbol to a value
+    """
+
+    globals: Dict[str, Set[str]]
+    """
+    This dictionary contains the list of all global symbols of a given program
+    """
+
+    def __init__(self,
+                 sections: Dict[str, Dict[str, Tuple[int, int]]],
+                 symbols: Dict[str, Dict[str, int]],
+                 globals: Dict[str, Set[str]],
+                 base: int = 0
+                 ):
+        self.sections = sections
+        self.symbols = symbols
+        self.globals = globals
+        self.base = base
+
+    def serialize(self) -> str:
+        def serialize(obj: any) -> str:
+            if isinstance(obj, defaultdict):
+                return json.dumps(dict(obj), default=serialize)
+            if isinstance(obj, (set, tuple)):
+                return json.dumps(list(obj), default=serialize)
+            return "<<unserializable {}>>".format(getattr(obj, '__qualname__', '{unknown}'))
+
+        return json.dumps(
+            dict(sections=self.sections, symbols=self.symbols, globals=self.globals, base=self.base),
+            default=serialize
+        )
+
+    @classmethod
+    def load(cls, serialized_str: str) -> 'MemoryImageDebugInfos':
+        json_obj: dict = json.loads(serialized_str)
+
+        if 'VERSION' not in json_obj:
+            raise RuntimeError("Unknown MemoryImageDebugInfo version!")
+
+        version: str = json_obj.pop('VERSION')
+
+        # compare major version
+        if version != cls.VERSION or version.split('.')[0] != cls.VERSION.split('.')[0]:
+            raise RuntimeError(
+                "Unknown MemoryImageDebugInfo version! This emulator expects version {}, debug info version {}".format(
+                    cls.VERSION, version
+                )
+            )
+
+        return MemoryImageDebugInfos(**json_obj)
+
+    @classmethod
+    def builder(cls) -> 'MemoryImageDebugInfos':
+        return MemoryImageDebugInfos(
+            defaultdict(dict), defaultdict(dict), defaultdict(set)
+        )
--- a/riscemu/syscall.py
+++ b/riscemu/syscall.py
@ -18,9 +18,9 @@ if typing.TYPE_CHECKING:
    from . import CPU

 SYSCALLS = {
-    63:   'read',
-    64:   'write',
-    93:   'exit',
+    63: 'read',
+    64: 'write',
+    93: 'exit',
    1024: 'open',
    1025: 'close',
 }
@ -35,6 +35,7 @@ OPEN_MODES = {
 }
 """All available file open modes"""

+
@dataclass(frozen=True)
 class Syscall:
    """
@ -199,4 +200,4 @@ class SyscallInterface:
        return "{}(\n\tfiles={}\n)".format(
            self.__class__.__name__,
            self.open_files
-        )
+        )
--- a/riscemu/tokenizer.py
+++ b/riscemu/tokenizer.py
@ -133,7 +133,3 @@ def split_whitespace_respecting_quotes(line: str) -> Iterable[str]:

    if part:
        yield part
-
-
-
-
--- a/riscemu/types.py
+++ b/riscemu/types.py
@ -1,74 +1,412 @@
-from typing import List, Tuple
-from .exceptions import MemoryAccessException
-from .helpers import parse_numeric_argument
-from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \
-    T_AbsoluteAddress, Program
+"""
+RiscEmu (c) 2021 Anton Lydike
+
+SPDX-License-Identifier: MIT
+
+This file contains abstract base classes and types, bundling only the absolute basic functionality
+
+See base.py for some basic implementations of these classes
+"""
+import os
+import re
+from abc import ABC, abstractmethod
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple, Set, Union, Generator, Iterator, Callable, Type
+
+from . import MMU, InstructionSet
+from .assembler import get_section_base_name
+from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_PARSE, FMT_RED, FMT_BOLD
+from .exceptions import ParseException
+from .helpers import format_bytes
+
+# define some base type aliases so we can keep track of absolute and relative addresses
+T_RelativeAddress = int
+T_AbsoluteAddress = int
+
+# parser options are just dictionaries with arbitrary values
+T_ParserOpts = Dict[str, any]
+
+NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')


-class SimpleInstruction(Instruction):
-    def __init__(self, name: str, args: Tuple[str], context: InstructionContext, addr: T_RelativeAddress):
-        self.context = context
-        self.name = name
-        self.args = args
-        self.addr = addr
+@dataclass(frozen=True)
+class MemoryFlags:
+    read_only: bool
+    executable: bool

+    def __repr__(self):
+        return "r{}{}".format(
+            '-' if self.read_only else 'w',
+            'x' if self.executable else '-'
+        )
+
+
+class InstructionContext:
+    base_address: T_AbsoluteAddress
+    """
+    The address where the instruction block is placed
+    """
+
+    labels: Dict[str, T_RelativeAddress]
+    """
+    This dictionary maps all labels to their relative position of the instruction block
+    """
+
+    numbered_labels: Dict[str, List[T_RelativeAddress]]
+    """
+    This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where 
+    the label was placed 
+    """
+
+    global_symbol_dict: Dict[str, T_AbsoluteAddress]
+    """
+    A reference to the MMU for access to global symbols
+    """
+
+    def __init__(self):
+        self.labels = dict()
+        self.numbered_labels = defaultdict(list)
+        self.base_address = 0
+        self.global_symbol_dict = dict()
+
+    def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]:
+        if NUMBER_SYMBOL_PATTERN.match(symbol):
+            if address_at is None:
+                raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
+
+            direction = symbol[-1]
+            if direction == 'b':
+                return max([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr < address_at],
+                           default=None)
+            else:
+                return min([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr > address_at],
+                           default=None)
+        else:
+            if symbol not in self.labels:
+                return self.global_symbol_dict.get(symbol, None)
+            value = self.labels.get(symbol, None)
+            if value is None:
+                return value
+            return value + self.base_address
+
+
+class Instruction(ABC):
+    name: str
+    args: tuple
+
+    @abstractmethod
    def get_imm(self, num: int) -> int:
-        resolved_label = self.context.resolve_label(self.args[num], self.addr)
-        if resolved_label is None:
-            return parse_numeric_argument(self.args[num])
-        return resolved_label
+        """
+        parse and get immediate argument
+        """
+        pass

+    @abstractmethod
    def get_imm_reg(self, num: int) -> Tuple[int, str]:
-        return self.get_imm(num + 1), self.get_reg(num)
+        """
+        parse and get an argument imm(reg)
+        """
+        pass

+    @abstractmethod
    def get_reg(self, num: int) -> str:
-        return self.args[num]
+        """
+        parse and get an register argument
+        """
+        pass
+
+    def __repr__(self):
+        return "{} {}".format(self.name, ", ".join(self.args))


-class InstructionMemorySection(MemorySection):
-    def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: Program, base: int = 0):
-        self.name = name
-        self.base = base
-        self.context = context
-        self.size = len(instructions) * 4
-        self.flags = MemoryFlags(True, True)
-        self.instructions = instructions
-        self.owner = owner.name
+@dataclass
+class MemorySection(ABC):
+    name: str
+    flags: MemoryFlags
+    size: int
+    base: T_AbsoluteAddress
+    owner: str
+    context: InstructionContext

+    @property
+    def end(self):
+        return self.base + self.size
+
+    @abstractmethod
    def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
-        raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
+        pass

+    @abstractmethod
    def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
-        raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
+        pass

+    @abstractmethod
    def read_ins(self, offset: T_RelativeAddress) -> Instruction:
-        if offset % 4 != 0:
-            raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
-        return self.instructions[offset // 4]
+        pass
+
+    def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
+             bytes_per_row: int = 16, rows: int = 10, group: int = 4):
+        if self.flags.executable:
+            bytes_per_row = 4
+        highlight = None
+        if end is None:
+            end = min(start + (bytes_per_row * (rows // 2)), self.size - 1)
+            highlight = start
+            start = max(0, start - (bytes_per_row * (rows // 2)))
+
+        if self.flags.executable:
+            print(FMT_MEM + "{}, viewing {} instructions:".format(
+                self, (end - start) // 4
+            ) + FMT_NONE)
+
+            for addr in range(start, end, 4):
+                if addr == highlight:
+                    print(FMT_UNDERLINE + FMT_ORANGE, end='')
+                print("0x{:04x}: {}{}".format(
+                    self.base + addr, self.read_ins(addr), FMT_NONE
+                ))
+        else:
+            print(FMT_MEM + "{}, viewing {} bytes:".format(
+                self, (end - start)
+            ) + FMT_NONE)
+
+            aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end
+
+            for addr in range(start, aligned_end, bytes_per_row):
+                hi_ind = (highlight - addr) // group if highlight is not None else -1
+                print("0x{:04x}: {}{}".format(
+                    self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
+                ))
+
+            if aligned_end != end:
+                hi_ind = (highlight - aligned_end) // group if highlight is not None else -1
+                print("0x{:04x}: {}{}".format(
+                    self.base + aligned_end, format_bytes(
+                        self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind
+                    ), FMT_NONE
+                ))
+
+    def dump_all(self, *args, **kwargs):
+        self.dump(0, self.size, *args, **kwargs)
+
+    def __repr__(self):
+        return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
+            self.__class__.__name__,
+            self.name,
+            self.base,
+            self.size,
+            self.flags,
+            self.owner
+        )


-class BinaryDataMemorySection(MemorySection):
-    def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: Program, base: int = 0):
+class Program:
+    """
+    This represents a collection of sections which together form an executable program
+
+    When you want to create a program which can be located anywhere in memory, set base to None,
+    this signals the other components, that this is relocatable. Set the base of each section to
+    the offset in the program, and everything will be taken care of for you.
+
+    """
+    name: str
+    context: InstructionContext
+    global_labels: Set[str]
+    sections: List[MemorySection]
+    base: Optional[T_AbsoluteAddress]
+    is_loaded: bool
+
+    @property
+    def size(self):
+        if len(self.sections) == 0:
+            return 0
+        if self.base is None:
+            return self.sections[-1].base + self.sections[-1].size
+        return (self.sections[-1].base - self.base) + self.sections[-1].size
+
+    def __init__(self, name: str, base: Optional[int] = None):
        self.name = name
+        self.context = InstructionContext()
+        self.sections = []
+        self.global_labels = set()
        self.base = base
-        self.context = context
-        self.size = len(data)
-        self.flags = MemoryFlags(False, False)
-        self.data = data
-        self.owner = owner.name
+        self.loaded = False

-    def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
-        if offset + size > self.size:
-            raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
-        return self.data[offset:offset + size]
+    def add_section(self, sec: MemorySection):
+        # print a warning when a section is located before the programs base
+        if self.base is not None:
+            if sec.base < self.base:
+                print(FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format(
+                    sec, self.name, self.base
+                ) + FMT_NONE)

-    def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
-        if offset + size > self.size:
-            raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
-        if len(data[0:size]) != size:
-            raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
-        self.data[offset:offset + size] = data[0:size]
+        self.sections.append(sec)
+        # keep section list ordered
+        self.sections.sort(key=lambda section: section.base)

-    def read_ins(self, offset: T_RelativeAddress) -> Instruction:
-        raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
-                                    offset, 4, 'instruction fetch')
+    def __repr__(self):
+        return "{}(name={},globals={},sections={},base={})".format(
+            self.__class__.__name__, self.name, self.global_labels,
+            [s.name for s in self.sections], self.base
+        )
+
+    @property
+    def entrypoint(self):
+        base = 0 if self.base is None else self.base
+        if '_start' in self.context.labels:
+            return base + self.context.labels.get('_start')
+        if 'main' in self.context.labels:
+            return base + self.context.labels.get('main')
+        for sec in self.sections:
+            if get_section_base_name(sec.name) == '.text' and sec.flags.executable:
+                return base + sec.base
+
+    def loaded_trigger(self, at_addr: T_AbsoluteAddress):
+        """
+        This trigger is called when the binary is loaded and its final address in memory is determined
+
+        This will do a small sanity check to prevent programs loading twice, or at addresses they don't
+        expect to be loaded.
+
+        :param at_addr: the address where the program will be located
+        """
+        if self.is_loaded:
+            if at_addr != self.base:
+                raise RuntimeError("Program loaded twice at different addresses! This will probably break things!")
+            return
+
+        if self.base is not None and self.base != at_addr:
+            print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, '
+                            'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE)
+
+        # if the program is not located anywhere explicitly in memory, add the program address
+        # to the defined section bases
+        if self.base is None:
+            for sec in self.sections:
+                sec.base += at_addr
+
+        if self.base != at_addr:
+            # move sections so they are located where they want to be located
+            offset = at_addr - self.base
+            for sec in self.sections:
+                sec.base += offset
+
+        self.base = at_addr
+        self.context.base_address = at_addr
+
+
+class ProgramLoader(ABC):
+    """
+    A program loader is always specific to a given source file. It is a place to store all state
+    concerning the parsing and loading of that specific source file, including options.
+    """
+
+    def __init__(self, source_path: str, options: T_ParserOpts):
+        self.source_path = source_path
+        self.options = options
+        self.filename = os.path.split(self.source_path)[-1]
+
+    @classmethod
+    @abstractmethod
+    def can_parse(cls, source_path: str) -> float:
+        """
+        Return confidence that the file located at source_path
+        should be parsed and loaded by this loader
+        :param source_path: the path of the source file
+        :return: the confidence that this file belongs to this parser
+        """
+        pass
+
+    @classmethod
+    @abstractmethod
+    def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
+        """
+        parse command line args into an options dictionary
+
+        :param argv: the command line args list
+        :return: all remaining command line args and the parser options object
+        """
+        pass
+
+    @classmethod
+    def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
+        """
+        Instantiate a loader for the given source file with the required arguments
+
+        :param source_path: the path to the source file
+        :param options: the parsed options (guaranteed to come from this classes get_options method.
+        :return: An instance of a ProgramLoader for the spcified source
+        """
+        return cls(source_path, options)
+
+    @abstractmethod
+    def parse(self) -> Union[Program, Iterator[Program]]:
+        """
+
+        :return:
+        """
+        pass
+
+
+class CPU(ABC):
+    # static cpu configuration
+    INS_XLEN: int = 4
+
+    # housekeeping variables
+    mmu: MMU
+    pc: T_AbsoluteAddress
+    cycle: int
+    halted: bool
+
+    # debugging context
+    debugger_active: bool
+
+    # instruction information
+    instructions: Dict[str, Callable[[Instruction], None]]
+    instruction_sets: Set[InstructionSet]
+
+    def __init__(self, mmu: MMU, instruction_sets: List[Type[InstructionSet]]):
+        self.mmu = mmu
+
+        self.instruction_sets = set()
+        self.instructions = dict()
+
+        for set_class in instruction_sets:
+            ins_set = set_class(self)
+            self.instructions.update(ins_set.load())
+            self.instruction_sets.add(ins_set)
+
+        self.cycle = 0
+        self.pc = 0
+        self.debugger_active = False
+
+        self.sections = list()
+        self.programs = list()
+
+    def run_instruction(self, ins: Instruction):
+        """
+        Execute a single instruction
+
+        :param ins: The instruction to execute
+        """
+        if ins.name in self.instructions:
+            self.instructions[ins.name](ins)
+        else:
+            # this should never be reached, as unknown instructions are imparseable
+            raise RuntimeError("Unknown instruction: {}".format(ins))
+
+    def load_program(self, program: Program):
+        self.mmu.load_program(program)
+
+    def __repr__(self):
+        """
+        Returns a representation of the CPU and some of its state.
+        """
+        return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format(
+            self.__class__.__name__,
+            self.pc,
+            self.cycle,
+            self.halted,
+            " ".join(s.name for s in self.instruction_sets)
+        )