finished basic RISC-V parser

2022-01-18 21:08:07 +01:00 · 2022-01-18 21:08:07 +01:00 · 0488a9d6bc
commit 0488a9d6bc
parent dc4dca6fea
16 changed files with 260 additions and 120 deletions
--- a/.idea/riscemu.iml
+++ b/.idea/riscemu.iml
@ -2,6 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/test" isTestSource="true" />
      <excludeFolder url="file://$MODULE_DIR$/venv" />
    </content>
    <orderEntry type="inheritedJdk" />
--- a/riscemu/init.py
+++ b/riscemu/init.py
@ -11,7 +11,7 @@ It contains everything needed to run assembly files, so you don't need any custo
 from .exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \
    ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException

-from .base_types import Executable, LoadedExecutable, LoadedMemorySection
+#from .base_types import Executable, LoadedExecutable, LoadedMemorySection

 from .instructions import *

@ -22,6 +22,8 @@ from .CPU import CPU

 from .config import RunConfig

+from .parser import tokenize, parse_tokens, parse_program_from_file
+
 __author__ = "Anton Lydike <Anton@Lydike.com>"
 __copyright__ = "Copyright 2021 Anton Lydike"
 __version__ = '1.0.0'
--- a/riscemu/assembler.py
+++ b/riscemu/assembler.py
@ -1,16 +1,14 @@
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple, Union, List
 from enum import Enum, auto
 from typing import Optional, Tuple, Union

-from helpers import parse_numeric_argument
-from .base_types import Program, T_RelativeAddress, InstructionContext
+from .helpers import parse_numeric_argument, align_addr, int_to_bytes
+from .base_types import Program, T_RelativeAddress, InstructionContext, Instruction
 from .colors import FMT_PARSE, FMT_NONE
-from .exceptions import ParseException
-from .helpers import ASSERT_LEN
+from .exceptions import ParseException, ASSERT_LEN, ASSERT_NOT_NULL
 from .tokenizer import Token
 from .types import BinaryDataMemorySection, InstructionMemorySection

-
 INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini')


@ -21,13 +19,25 @@ class MemorySectionType(Enum):

 class CurrentSection:
    name: str
-    data: Union[list, bytearray]
+    data: Union[List[Instruction], bytearray]
    type: MemorySectionType
+    base: int
+
+    def __init__(self, name: str, type: MemorySectionType, base: int = 0):
+        self.name = name
+        self.type = type
+        self.base = base
+        if self.type == MemorySectionType.Data:
+            self.data = bytearray()
+        elif self.type == MemorySectionType.Instructions:
+            self.data = list()
+        else:
+            raise ParseException("Unknown section type: {}".format(type))

    def current_address(self) -> T_RelativeAddress:
        if self.type == MemorySectionType.Data:
-            return len(self.data)
-        return len(self.data) * 4
+            return len(self.data) + self.base
+        return len(self.data) * 4 + self.base

    def __repr__(self):
        return "{}(name={},data={},type={})".format(
@ -47,18 +57,27 @@ class ParseContext:
        self.section = None

    def finalize(self) -> Program:
-        self.finalize_section()
+        self._finalize_section()
        return self.program

-    def finalize_section(self):
+    def _finalize_section(self):
        if self.section is None:
            return
        if self.section.type == MemorySectionType.Data:
-            section = BinaryDataMemorySection(self.section.data, self.section.name, self.context)
+            section = BinaryDataMemorySection(self.section.data, self.section.name, self.context, self.program)
            self.program.add_section(section)
        elif self.section.type == MemorySectionType.Instructions:
-            section = InstructionMemorySection(self.section.data, self.section.name, self.context)
+            section = InstructionMemorySection(self.section.data, self.section.name, self.context, self.program)
            self.program.add_section(section)
+        self.section = None
+
+    def new_section(self, name: str, type: MemorySectionType):
+        base = 0
+        if self.section is not None:
+            base = align_addr(self.section.current_address(), 4)
+            print("base at {}".format(base))
+        self._finalize_section()
+        self.section = CurrentSection(name, type, base)

    def __repr__(self):
        return "{}(\n\tsetion={},\n\tprogram={}\n)".format(
@ -100,21 +119,20 @@ class AssemblerDirectives:
    @classmethod
    def op_section(cls, token: Token, args: Tuple[str], context: ParseContext):
        ASSERT_LEN(args, 1)
-        context.finalize_section()
-
        if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES:
-            context.section.type = MemorySectionType.Instructions
-            context.section.data = list()
+            context.new_section(args[0], MemorySectionType.Instructions)
        else:
-            context.section.type = MemorySectionType.Data
-            context.section.data = bytearray()
-        context.section.name = args[0]
+            context.new_section(args[0], MemorySectionType.Data)

    @classmethod
    def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext):
        ASSERT_LEN(args, 1)
        context.program.global_labels.add(args[0])

+    @classmethod
+    def op_global(cls, token: Token, args: Tuple[str], context: ParseContext):
+        cls.op_globl(token, args, context)
+
    @classmethod
    def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext):
        ASSERT_LEN(args, 2)
@ -122,6 +140,14 @@ class AssemblerDirectives:
        value = parse_numeric_argument(args[1])
        context.context.labels[name] = value

+    @classmethod
+    def op_space(cls, token: Token, args: Tuple[str], context: ParseContext):
+        ASSERT_LEN(args, 1)
+        ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
+
+        size = parse_numeric_argument(args[0])
+        cls.add_bytes(size, None, context)
+
    @classmethod
    def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext):
        ASSERT_LEN(args, 1)
@ -130,11 +156,14 @@ class AssemblerDirectives:
        cls.add_bytes(size, bytearray(size), context)

    @classmethod
-    def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext):
+    def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext, unsigned=False):
        ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)

        if content is None:
            content = bytearray(size)
+        if isinstance(context, int):
+            content = int_to_bytes(content, size, unsigned)
+        context.section.data += content

    @classmethod
    def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True):
--- a/riscemu/base_types.py
+++ b/riscemu/base_types.py
@ -7,12 +7,14 @@ This file contains base classes which represent loaded programs
 """

 import re
-from abc import ABC
+from abc import ABC, abstractmethod
+from collections import defaultdict
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple, Set
-from collections import defaultdict

-from .helpers import *
+from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE
+from .exceptions import ParseException
+from .helpers import format_bytes

 T_RelativeAddress = int
 T_AbsoluteAddress = int
--- a/riscemu/exceptions.py
+++ b/riscemu/exceptions.py
@ -4,8 +4,6 @@ RiscEmu (c) 2021 Anton Lydike
 SPDX-License-Identifier: MIT
 """

-import typing
-
 from abc import abstractmethod
 from .base_types import Instruction
 from .colors import *
@ -21,7 +19,7 @@ class RiscemuBaseException(BaseException):

 class ParseException(RiscemuBaseException):
    def __init__(self, msg, data=None):
-        super().__init__()
+        super().__init__(msg, data)
        self.msg = msg
        self.data = data

--- a/riscemu/helpers.py
+++ b/riscemu/helpers.py
@ -6,7 +6,7 @@ SPDX-License-Identifier: MIT

 from math import log10, ceil
 from .exceptions import *
-from typing import Iterable, Iterator, TypeVar, Generic, List
+from typing import Iterable, Iterator, TypeVar, Generic, List, Optional


 def align_addr(addr: int, to_bytes: int = 8) -> int:
@ -124,7 +124,7 @@ class Peekable(Generic[T], Iterator[T]):
            return self.cache.pop()
        return next(self.iterable)

-    def peek(self) -> T:
+    def peek(self) -> Optional[T]:
        try:
            if self.cache:
                return self.cache[0]
--- a/riscemu/instructions/InstructionSet.py
+++ b/riscemu/instructions/InstructionSet.py
@ -8,8 +8,9 @@ from typing import Tuple, Callable, Dict

 from abc import ABC
 from ..CPU import CPU
-from ..helpers import ASSERT_LEN, ASSERT_IN, to_unsigned
-from ..base_types import LoadedInstruction
+from ..helpers import to_unsigned
+from ..exceptions import ASSERT_LEN, ASSERT_IN
+from ..base_types import Instruction


 class InstructionSet(ABC):
@ -30,7 +31,7 @@ class InstructionSet(ABC):
        self.name = self.__class__.__name__
        self.cpu = cpu

-    def load(self) -> Dict[str, Callable[['LoadedInstruction'], None]]:
+    def load(self) -> Dict[str, Callable[['Instruction'], None]]:
        """
        This is called by the CPU once it instantiates this instruction set

@ -51,7 +52,7 @@ class InstructionSet(ABC):
            if member.startswith('instruction_'):
                yield member[12:].replace('_', '.'), getattr(self, member)

-    def parse_mem_ins(self, ins: 'LoadedInstruction') -> Tuple[str, int]:
+    def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, int]:
        """
        parses both rd, rs, imm and rd, imm(rs) argument format and returns (rd, imm+rs1)
        (so a register and address tuple for memory instructions)
@ -69,7 +70,7 @@ class InstructionSet(ABC):
        rd = ins.get_reg(0)
        return rd, rs + imm

-    def parse_rd_rs_rs(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]:
+    def parse_rd_rs_rs(self, ins: 'Instruction', signed=True) -> Tuple[str, int, int]:
        """
        Assumes the command is in <name> rd, rs1, rs2 format
        Returns the name of rd, and the values in rs1 and rs2
@ -84,7 +85,7 @@ class InstructionSet(ABC):
                   to_unsigned(self.get_reg_content(ins, 1)), \
                   to_unsigned(self.get_reg_content(ins, 2))

-    def parse_rd_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]:
+    def parse_rd_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[str, int, int]:
        """
        Assumes the command is in <name> rd, rs, imm format
        Returns the name of rd, the value in rs and the immediate imm
@ -99,7 +100,7 @@ class InstructionSet(ABC):
                   to_unsigned(self.get_reg_content(ins, 1)), \
                   to_unsigned(ins.get_imm(2))

-    def parse_rs_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[int, int, int]:
+    def parse_rs_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[int, int, int]:
        """
        Assumes the command is in <name> rs1, rs2, imm format
        Returns the values in rs1, rs2 and the immediate imm
@ -113,7 +114,7 @@ class InstructionSet(ABC):
                   to_unsigned(self.get_reg_content(ins, 1)), \
                   to_unsigned(ins.get_imm(2))

-    def get_reg_content(self, ins: 'LoadedInstruction', ind: int) -> int:
+    def get_reg_content(self, ins: 'Instruction', ind: int) -> int:
        """
        get the register name from ins and then return the register contents
        """
--- a/riscemu/instructions/RV32A.py
+++ b/riscemu/instructions/RV32A.py
@ -1,4 +1,4 @@
-from .InstructionSet import InstructionSet, LoadedInstruction
+from .InstructionSet import InstructionSet, Instruction
 from ..exceptions import INS_NOT_IMPLEMENTED
 from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed

@ -10,13 +10,13 @@ class RV32A(InstructionSet):
    for this?
    """

-    def instruction_lr_w(self, ins: 'LoadedInstruction'):
+    def instruction_lr_w(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_sc_w(self, ins: 'LoadedInstruction'):
+    def instruction_sc_w(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_amoswap_w(self, ins: 'LoadedInstruction'):
+    def instruction_amoswap_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
        if dest == 'zero':
            self.mmu.write(addr, int_to_bytes(addr, 4))
@ -25,37 +25,37 @@ class RV32A(InstructionSet):
            self.mmu.write(addr, int_to_bytes(val, 4))
            self.regs.set(dest, old)

-    def instruction_amoadd_w(self, ins: 'LoadedInstruction'):
+    def instruction_amoadd_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
        old = int_from_bytes(self.mmu.read(addr, 4))
        self.mmu.write(addr, int_to_bytes(old + val, 4))
        self.regs.set(dest, old)

-    def instruction_amoand_w(self, ins: 'LoadedInstruction'):
+    def instruction_amoand_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
        old = int_from_bytes(self.mmu.read(addr, 4))
        self.mmu.write(addr, int_to_bytes(old & val, 4))
        self.regs.set(dest, old)

-    def instruction_amoor_w(self, ins: 'LoadedInstruction'):
+    def instruction_amoor_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
        old = int_from_bytes(self.mmu.read(addr, 4))
        self.mmu.write(addr, int_to_bytes(old | val, 4))
        self.regs.set(dest, old)

-    def instruction_amoxor_w(self, ins: 'LoadedInstruction'):
+    def instruction_amoxor_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
        old = int_from_bytes(self.mmu.read(addr, 4))
        self.mmu.write(addr, int_to_bytes(old ^ val, 4))
        self.regs.set(dest, old)

-    def instruction_amomax_w(self, ins: 'LoadedInstruction'):
+    def instruction_amomax_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
        old = int_from_bytes(self.mmu.read(addr, 4))
        self.mmu.write(addr, int_to_bytes(max(old, val), 4))
        self.regs.set(dest, old)

-    def instruction_amomaxu_w(self, ins: 'LoadedInstruction'):
+    def instruction_amomaxu_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
        val = to_unsigned(val)
        old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True)
@ -63,13 +63,13 @@ class RV32A(InstructionSet):
        self.mmu.write(addr, int_to_bytes(to_signed(max(old, val)), 4))
        self.regs.set(dest, old)

-    def instruction_amomin_w(self, ins: 'LoadedInstruction'):
+    def instruction_amomin_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
        old = int_from_bytes(self.mmu.read(addr, 4))
        self.mmu.write(addr, int_to_bytes(min(old, val), 4))
        self.regs.set(dest, old)

-    def instruction_amominu_w(self, ins: 'LoadedInstruction'):
+    def instruction_amominu_w(self, ins: 'Instruction'):
        dest, addr, val = self.parse_rd_rs_rs(ins)
        val = to_unsigned(val)
        old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True)
--- a/riscemu/instructions/RV32I.py
+++ b/riscemu/instructions/RV32I.py
@ -11,7 +11,7 @@ from ..colors import FMT_DEBUG, FMT_NONE
 from ..debug import launch_debug_session
 from ..exceptions import LaunchDebuggerException
 from ..syscall import Syscall
-from ..base_types import LoadedInstruction
+from ..base_types import Instruction


 class RV32I(InstructionSet):
@ -23,39 +23,39 @@ class RV32I(InstructionSet):
    See https://maxvytech.com/images/RV32I-11-2018.pdf for a more detailed overview
    """

-    def instruction_lb(self, ins: 'LoadedInstruction'):
+    def instruction_lb(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
        self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1)))

-    def instruction_lh(self, ins: 'LoadedInstruction'):
+    def instruction_lh(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
        self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2)))

-    def instruction_lw(self, ins: 'LoadedInstruction'):
+    def instruction_lw(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
        self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 4)))

-    def instruction_lbu(self, ins: 'LoadedInstruction'):
+    def instruction_lbu(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
        self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1), unsigned=True))

-    def instruction_lhu(self, ins: 'LoadedInstruction'):
+    def instruction_lhu(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
        self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2), unsigned=True))

-    def instruction_sb(self, ins: 'LoadedInstruction'):
+    def instruction_sb(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
        self.mmu.write(addr, 1, int_to_bytes(self.regs.get(rd), 1))

-    def instruction_sh(self, ins: 'LoadedInstruction'):
+    def instruction_sh(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
        self.mmu.write(addr, 2, int_to_bytes(self.regs.get(rd), 2))

-    def instruction_sw(self, ins: 'LoadedInstruction'):
+    def instruction_sw(self, ins: 'Instruction'):
        rd, addr = self.parse_mem_ins(ins)
        self.mmu.write(addr, 4, int_to_bytes(self.regs.get(rd), 4))

-    def instruction_sll(self, ins: 'LoadedInstruction'):
+    def instruction_sll(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
@ -65,7 +65,7 @@ class RV32I(InstructionSet):
            to_signed(to_unsigned(self.regs.get(src1)) << (self.regs.get(src2) & 0b11111))
        )

-    def instruction_slli(self, ins: 'LoadedInstruction'):
+    def instruction_slli(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
@ -75,7 +75,7 @@ class RV32I(InstructionSet):
            to_signed(to_unsigned(self.regs.get(src1)) << (imm & 0b11111))
        )

-    def instruction_srl(self, ins: 'LoadedInstruction'):
+    def instruction_srl(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
@ -85,7 +85,7 @@ class RV32I(InstructionSet):
            to_signed(to_unsigned(self.regs.get(src1)) >> (self.regs.get(src2) & 0b11111))
        )

-    def instruction_srli(self, ins: 'LoadedInstruction'):
+    def instruction_srli(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
@ -95,7 +95,7 @@ class RV32I(InstructionSet):
            to_signed(to_unsigned(self.regs.get(src1)) >> (imm & 0b11111))
        )

-    def instruction_sra(self, ins: 'LoadedInstruction'):
+    def instruction_sra(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
@ -105,7 +105,7 @@ class RV32I(InstructionSet):
            self.regs.get(src1) >> (self.regs.get(src2) & 0b11111)
        )

-    def instruction_srai(self, ins: 'LoadedInstruction'):
+    def instruction_srai(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 3)
        dst = ins.get_reg(0)
        src1 = ins.get_reg(1)
@ -115,7 +115,7 @@ class RV32I(InstructionSet):
            self.regs.get(src1) >> (imm & 0b11111)
        )

-    def instruction_add(self, ins: 'LoadedInstruction'):
+    def instruction_add(self, ins: 'Instruction'):
        dst = ""
        if self.cpu.conf.add_accept_imm:
            try:
@ -130,139 +130,139 @@ class RV32I(InstructionSet):
            rs1 + rs2
        )

-    def instruction_addi(self, ins: 'LoadedInstruction'):
+    def instruction_addi(self, ins: 'Instruction'):
        dst, rs1, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(
            dst,
            rs1 + imm
        )

-    def instruction_sub(self, ins: 'LoadedInstruction'):
+    def instruction_sub(self, ins: 'Instruction'):
        dst, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            dst,
            rs1 - rs2
        )

-    def instruction_lui(self, ins: 'LoadedInstruction'):
+    def instruction_lui(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
        imm = ins.get_imm(1)
        self.regs.set(reg, imm << 12)

-    def instruction_auipc(self, ins: 'LoadedInstruction'):
+    def instruction_auipc(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
        imm = to_unsigned(ins.get_imm(1))
        self.regs.set(reg, self.pc + (imm << 12))

-    def instruction_xor(self, ins: 'LoadedInstruction'):
+    def instruction_xor(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 ^ rs2
        )

-    def instruction_xori(self, ins: 'LoadedInstruction'):
+    def instruction_xori(self, ins: 'Instruction'):
        rd, rs1, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(
            rd,
            rs1 ^ imm
        )

-    def instruction_or(self, ins: 'LoadedInstruction'):
+    def instruction_or(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 | rs2
        )

-    def instruction_ori(self, ins: 'LoadedInstruction'):
+    def instruction_ori(self, ins: 'Instruction'):
        rd, rs1, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(
            rd,
            rs1 | imm
        )

-    def instruction_and(self, ins: 'LoadedInstruction'):
+    def instruction_and(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 & rs2
        )

-    def instruction_andi(self, ins: 'LoadedInstruction'):
+    def instruction_andi(self, ins: 'Instruction'):
        rd, rs1, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(
            rd,
            rs1 & imm
        )

-    def instruction_slt(self, ins: 'LoadedInstruction'):
+    def instruction_slt(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            int(rs1 < rs2)
        )

-    def instruction_slti(self, ins: 'LoadedInstruction'):
+    def instruction_slti(self, ins: 'Instruction'):
        rd, rs1, imm = self.parse_rd_rs_imm(ins)
        self.regs.set(
            rd,
            int(rs1 < imm)
        )

-    def instruction_sltu(self, ins: 'LoadedInstruction'):
+    def instruction_sltu(self, ins: 'Instruction'):
        dst, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
        self.regs.set(
            dst,
            int(rs1 < rs2)
        )

-    def instruction_sltiu(self, ins: 'LoadedInstruction'):
+    def instruction_sltiu(self, ins: 'Instruction'):
        dst, rs1, imm = self.parse_rd_rs_imm(ins, signed=False)
        self.regs.set(
            dst,
            int(rs1 < imm)
        )

-    def instruction_beq(self, ins: 'LoadedInstruction'):
+    def instruction_beq(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 == rs2:
            self.pc = dst

-    def instruction_bne(self, ins: 'LoadedInstruction'):
+    def instruction_bne(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 != rs2:
            self.pc = dst

-    def instruction_blt(self, ins: 'LoadedInstruction'):
+    def instruction_blt(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 < rs2:
            self.pc = dst

-    def instruction_bge(self, ins: 'LoadedInstruction'):
+    def instruction_bge(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins)
        if rs1 >= rs2:
            self.pc = dst

-    def instruction_bltu(self, ins: 'LoadedInstruction'):
+    def instruction_bltu(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
        if rs1 < rs2:
            self.pc = dst

-    def instruction_bgeu(self, ins: 'LoadedInstruction'):
+    def instruction_bgeu(self, ins: 'Instruction'):
        rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
        if rs1 >= rs2:
            self.pc = dst

    # technically deprecated
-    def instruction_j(self, ins: 'LoadedInstruction'):
+    def instruction_j(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 1)
        addr = ins.get_imm(0)
        self.pc = addr

-    def instruction_jal(self, ins: 'LoadedInstruction'):
+    def instruction_jal(self, ins: 'Instruction'):
        reg = 'ra'  # default register is ra
        if len(ins.args) == 1:
            addr = ins.get_imm(0)
@ -273,29 +273,29 @@ class RV32I(InstructionSet):
        self.regs.set(reg, self.pc)
        self.pc = addr

-    def instruction_jalr(self, ins: 'LoadedInstruction'):
+    def instruction_jalr(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
        addr = ins.get_imm(1)
        self.regs.set(reg, self.pc)
        self.pc = addr

-    def instruction_ret(self, ins: 'LoadedInstruction'):
+    def instruction_ret(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 0)
        self.pc = self.regs.get('ra')

-    def instruction_ecall(self, ins: 'LoadedInstruction'):
+    def instruction_ecall(self, ins: 'Instruction'):
        self.instruction_scall(ins)

-    def instruction_ebreak(self, ins: 'LoadedInstruction'):
+    def instruction_ebreak(self, ins: 'Instruction'):
        self.instruction_sbreak(ins)

-    def instruction_scall(self, ins: 'LoadedInstruction'):
+    def instruction_scall(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 0)
        syscall = Syscall(self.regs.get('a7'), self.cpu)
        self.cpu.syscall_int.handle_syscall(syscall)

-    def instruction_sbreak(self, ins: 'LoadedInstruction'):
+    def instruction_sbreak(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 0)
        if self.cpu.active_debug:
            print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE)
@ -307,23 +307,23 @@ class RV32I(InstructionSet):
            "Debug instruction encountered at 0x{:08X}".format(self.pc - 1)
        )

-    def instruction_nop(self, ins: 'LoadedInstruction'):
+    def instruction_nop(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 0)
        pass

-    def instruction_li(self, ins: 'LoadedInstruction'):
+    def instruction_li(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
        immediate = ins.get_imm(1)
        self.regs.set(reg, immediate)

-    def instruction_la(self, ins: 'LoadedInstruction'):
+    def instruction_la(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        reg = ins.get_reg(0)
        immediate = ins.get_imm(1)
        self.regs.set(reg, immediate)

-    def instruction_mv(self, ins: 'LoadedInstruction'):
+    def instruction_mv(self, ins: 'Instruction'):
        ASSERT_LEN(ins.args, 2)
        rd, rs = ins.get_reg(0), ins.get_reg(1)
        self.regs.set(rd, self.regs.get(rs))
--- a/riscemu/instructions/RV32M.py
+++ b/riscemu/instructions/RV32M.py
@ -12,48 +12,48 @@ class RV32M(InstructionSet):
    """
    The RV32M Instruction set, containing multiplication and division instructions
    """
-    def instruction_mul(self, ins: 'LoadedInstruction'):
+    def instruction_mul(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 * rs2
        )

-    def instruction_mulh(self, ins: 'LoadedInstruction'):
+    def instruction_mulh(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            (rs1 * rs2) >> 32
        )

-    def instruction_mulhsu(self, ins: 'LoadedInstruction'):
+    def instruction_mulhsu(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_mulhu(self, ins: 'LoadedInstruction'):
+    def instruction_mulhu(self, ins: 'Instruction'):
        INS_NOT_IMPLEMENTED(ins)

-    def instruction_div(self, ins: 'LoadedInstruction'):
+    def instruction_div(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 // rs2
        )

-    def instruction_divu(self, ins: 'LoadedInstruction'):
+    def instruction_divu(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
        self.regs.set(
            rd,
            rs1 // rs2
        )

-    def instruction_rem(self, ins: 'LoadedInstruction'):
+    def instruction_rem(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
        self.regs.set(
            rd,
            rs1 % rs2
        )

-    def instruction_remu(self, ins: 'LoadedInstruction'):
+    def instruction_remu(self, ins: 'Instruction'):
        rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
        self.regs.set(
            rd,
--- a/riscemu/parser.py
+++ b/riscemu/parser.py
@ -3,15 +3,16 @@ RiscEmu (c) 2021 Anton Lydike

 SPDX-License-Identifier: MIT
 """
+import os
 import re
 from typing import Dict, Tuple, Iterable, Callable

-from helpers import Peekable
+from .helpers import Peekable
 from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
 from .base_types import Program
 from .colors import FMT_PARSE
 from .exceptions import ParseException
-from .tokenizer import Token, TokenType
+from .tokenizer import Token, TokenType, tokenize
 from .types import SimpleInstruction


@ -41,17 +42,32 @@ PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {


 def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
+    """
+    Convert a token stream into a parsed program
+    :param name: the programs name
+    :param tokens_iter: the programs content, tokenized
+    :return: a parsed program
+    """
    context = ParseContext(name)

    for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
        if token.type not in PARSERS:
            raise ParseException("Unexpected token type: {}, {}".format(token, args))
+        print("{} {}".format(token, args))
        PARSERS[token.type](token, args, context)

    return context.finalize()


 def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]:
+    """
+    Convert an iterator over tokens into an iterator over tuples: (token, list(token))
+
+    The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before
+    a newline is encountered
+    :param tokens_iter: An iterator over tokens
+    :return: An iterator over a slightly more structured representation of the tokens
+    """
    tokens: Peekable[Token] = Peekable[Token](tokens_iter)

    while not tokens.is_empty():
@ -75,5 +91,10 @@ def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
        elif tokens.peek().type == TokenType.NEWLINE:
            next(tokens)
            break
-        raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
+        break
+        #raise ParseException("Expected newline, instead got {}".format(tokens.peek()))

+
+def parse_program_from_file(path: str) -> Program:
+    with open(path, 'r') as f:
+        return parse_tokens(os.path.split(path)[-1], tokenize(f))
--- a/riscemu/tokenizer.py
+++ b/riscemu/tokenizer.py
@ -7,7 +7,7 @@ SPDX-License-Identifier: MIT
 import re
 from dataclasses import dataclass
 from enum import Enum, auto
-from typing import List, Iterable
+from typing import List, Iterable, Optional
 from riscemu.decoder import RISCV_REGS

 from .exceptions import ParseException
@ -17,8 +17,6 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
 MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$')
 REGISTER_NAMES = RISCV_REGS

-I = lambda x: x
-

 class TokenType(Enum):
    COMMA = auto()
@ -39,7 +37,7 @@ class Token:
            return '\\n'
        if self.type == TokenType.COMMA:
            return ', '
-        return '{}({}) '.format(self.type.name[0:3], self.value)
+        return '{}({})'.format(self.type.name[0:3], self.value)


 NEWLINE = Token(TokenType.NEWLINE, '\n')
@ -55,7 +53,7 @@ def tokenize(input: Iterable[str]) -> Iterable[Token]:
        if not line:
            continue

-        parts = list(part for part in re.split(WHITESPACE_PATTERN, line) if part)
+        parts = list(part for part in split_whitespace_respecting_quotes(line) if part)

        yield from parse_line(parts)
        yield NEWLINE
@ -70,6 +68,8 @@ def parse_line(parts: List[str]) -> Iterable[Token]:
        yield Token(TokenType.PSEUDO_OP, first_token)
    elif first_token[-1] == ':':
        yield Token(TokenType.LABEL, first_token)
+        yield from parse_line(parts[1:])
+        return
    else:
        yield Token(TokenType.INSTRUCTION_NAME, first_token)

@ -100,3 +100,40 @@ def print_tokens(tokens: Iterable[Token]):
    for token in tokens:
        print(token, end='\n' if token == NEWLINE else '')
    print("", flush=True, end="")
+
+
+def split_whitespace_respecting_quotes(line: str) -> Iterable[str]:
+    quote = ""
+    part = ""
+    for c in line:
+        if c == quote:
+            yield part
+            part = ""
+            quote = ""
+            continue
+
+        if quote != "":
+            part += c
+            continue
+
+        if c in "\"'":
+            if part:
+                yield part
+            quote = c
+            part = ""
+            continue
+
+        if c in ' \t\n':
+            if part:
+                yield part
+            part = ""
+            continue
+
+        part += c
+
+    if part:
+        yield part
+
+
+
+
--- a/riscemu/types.py
+++ b/riscemu/types.py
@ -2,7 +2,7 @@ from typing import List, Tuple
 from .exceptions import MemoryAccessException
 from .helpers import parse_numeric_argument
 from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \
-    T_AbsoluteAddress
+    T_AbsoluteAddress, Program


 class SimpleInstruction(Instruction):
@ -26,13 +26,14 @@ class SimpleInstruction(Instruction):


 class InstructionMemorySection(MemorySection):
-    def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, base: int = 0):
+    def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: Program, base: int = 0):
        self.name = name
        self.base = base
        self.context = context
        self.size = len(instructions) * 4
        self.flags = MemoryFlags(True, True)
        self.instructions = instructions
+        self.owner = owner.name

    def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
        raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
@ -47,13 +48,14 @@ class InstructionMemorySection(MemorySection):


 class BinaryDataMemorySection(MemorySection):
-    def __init__(self, data: bytearray, name: str, context: InstructionContext, base: int = 0):
+    def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: Program, base: int = 0):
        self.name = name
        self.base = base
        self.context = context
        self.size = len(data)
        self.flags = MemoryFlags(False, False)
        self.data = data
+        self.owner = owner.name

    def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
        if offset + size > self.size:
--- a/test/init.py
+++ b/test/init.py
@ -0,0 +1,2 @@
+from .test_tokenizer import *
+from .test_helpers import *
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@ -3,7 +3,7 @@ from unittest import TestCase
 from riscemu.helpers import *


-class Test(TestCase):
+class TestHelpers(TestCase):
    def test_int_to_bytes(self):
        self.assertEqual(int_to_bytes(-1), bytearray([0xff] * 4), "-1")
        self.assertEqual(int_to_bytes(1), bytearray([0, 0, 0, 1]), "1")
--- a/test/test_tokenizer.py
+++ b/test/test_tokenizer.py
@ -1,6 +1,7 @@
 from unittest import TestCase

-from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA
+from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA, \
+    split_whitespace_respecting_quotes


 def ins(name: str) -> Token:
@ -19,7 +20,7 @@ def lbl(name: str) -> Token:
    return Token(TokenType.LABEL, name)


-class Test(TestCase):
+class TestTokenizer(TestCase):

    def test_instructions(self):
        program = [
@ -79,3 +80,47 @@ section:

        self.assertEqual(list(tokenize(program.splitlines())), tokens)

+    def test_split_whitespace_respecting_quotes_single(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes("test")), ["test"]
+        )
+
+    def test_split_whitespace_respecting_quotes_empty(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes("")), []
+        )
+
+    def test_split_whitespace_respecting_quotes_two_parts(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes("test 123")), ["test", "123"]
+        )
+
+    def test_split_whitespace_respecting_quotes_whole_quoted(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes("'test 123'")), ["test 123"]
+        )
+
+    def test_split_whitespace_respecting_quotes_double_quotes(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes('"test 123"')), ["test 123"]
+        )
+
+    def test_split_whitespace_respecting_quotes_quoted_then_normal(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes('"test 123" abc')), ["test 123", "abc"]
+        )
+
+    def test_split_whitespace_respecting_quotes_quoted_sorrounded(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes('hello "test 123" abc')), ["hello", "test 123", "abc"]
+        )
+
+    def test_split_whitespace_respecting_quotes_weird_spaces(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes('hello  "test 123"\tabc')), ["hello", "test 123", "abc"]
+        )
+
+    def test_split_whitespace_respecting_quotes_quotes_no_spaces(self):
+        self.assertEqual(
+            list(split_whitespace_respecting_quotes('hello"test 123"abc')), ["hello", "test 123", "abc"]
+        )