diff --git a/.idea/misc.xml b/.idea/misc.xml
index d1e22ec..a15ea67 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
-
+
\ No newline at end of file
diff --git a/.idea/riscemu.iml b/.idea/riscemu.iml
index 8b8c395..8e5446a 100644
--- a/.idea/riscemu.iml
+++ b/.idea/riscemu.iml
@@ -1,7 +1,9 @@
-
+
+
+
diff --git a/riscemu/Exceptions.py b/riscemu/Exceptions.py
new file mode 100644
index 0000000..7bcb159
--- /dev/null
+++ b/riscemu/Exceptions.py
@@ -0,0 +1,33 @@
+class ParseException(BaseException):
+ def __init__(self, msg, data=None):
+ super().__init__()
+ self.msg = msg
+ self.data = data
+
+ def message(self):
+ return "{}(\"{}\", data={})".format(self.__class__.__name__, self.msg, self.data)
+
+
+def ASSERT_EQ(a1, a2):
+ if a1 != a2:
+ raise ParseException("ASSERTION_FAILED: Expected elements to be equal!", (a1, a2))
+
+
+def ASSERT_LEN(a1, size):
+ if len(a1) != size:
+ raise ParseException("ASSERTION_FAILED: Expected {} to be of length {}".format(a1, size), (a1, size))
+
+
+def ASSERT_NOT_NULL(a1):
+ if a1 is None:
+ raise ParseException("ASSERTION_FAILED: Expected {} to be non null".format(a1), (a1,))
+
+
+def ASSERT_NOT_IN(a1, a2):
+ if a1 in a2:
+ raise ParseException("ASSERTION_FAILED: Expected {} to not be in {}".format(a1, a2), (a1,a2))
+
+
+def ASSERT_IN(a1, a2):
+ if a1 not in a2:
+ raise ParseException("ASSERTION_FAILED: Expected {} to not be in {}".format(a1, a2), (a1,a2))
diff --git a/riscemu/Executable.py b/riscemu/Executable.py
new file mode 100644
index 0000000..94fb995
--- /dev/null
+++ b/riscemu/Executable.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass, field
+from typing import Dict, List, Tuple
+from . import MemoryFlags, RiscVInstructionToken, RiscVTokenizer, RiscVSymbolToken, RiscVPseudoOpToken
+from .Exceptions import *
+
+
+@dataclass
+class MemorySection:
+ name: str
+ flags: MemoryFlags
+ size: int = 0
+ start: int = -1
+ content: List[bytearray] = field(default_factory=list)
+
+ def add(self, data: bytearray):
+ self.content.append(data)
+ self.size += len(data)
+
+
+class InstructionMemorySection(MemorySection):
+ insn: List[RiscVInstructionToken] = field(default_factory=list)
+
+ def add_insn(self, insn: RiscVInstructionToken):
+ self.insn.append(insn)
+ self.size += 4
+
+
+@dataclass
+class Executable:
+ run_ptr: Tuple[str, int]
+ sections: Dict[str, MemorySection]
+ symbols: Dict[str, Tuple[str, int]]
+
diff --git a/riscemu/ExecutableParser.py b/riscemu/ExecutableParser.py
new file mode 100644
index 0000000..5eacdc2
--- /dev/null
+++ b/riscemu/ExecutableParser.py
@@ -0,0 +1,106 @@
+from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags
+from .Exceptions import *
+from .Tokenizer import RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, RiscVPseudoOpToken
+
+from typing import Dict, Tuple, List
+
+
+def parse_numeric_argument(arg: str):
+ if arg.startswith('0x') or arg.startswith('0X'):
+ return int(arg, 16)
+ return int(arg)
+
+class ExecutableParser:
+ tokenizer: RiscVTokenizer
+
+ def __init__(self, tokenizer: RiscVTokenizer):
+ self.instructions: List[RiscVInstructionToken] = list()
+ self.symbols: Dict[str, Tuple[str, int]] = dict()
+ self.sections: Dict[str, MemorySection] = dict()
+ self.tokenizer = tokenizer
+ self.active_section = None
+ self.implicit_sections = False
+
+ def parse(self):
+ for token in self.tokenizer.tokens:
+ if isinstance(token, RiscVInstructionToken):
+ self.parse_instruction(token)
+ elif isinstance(token, RiscVSymbolToken):
+ self.handle_symbol(token)
+ elif isinstance(token, RiscVPseudoOpToken):
+ self.handle_pseudo_op(token)
+
+ def get_execuable(self):
+ start_ptr = ('text', 0)
+ if '_start' in self.symbols:
+ start_ptr = self.symbols['_start']
+ elif 'main' in self.symbols:
+ start_ptr = self.symbols['main']
+ return Executable(start_ptr, self.sections, self.symbols)
+
+ def parse_instruction(self, ins: RiscVInstructionToken):
+ if self.active_section is None:
+ self.op_text()
+ self.implicit_sections = True
+
+ ASSERT_EQ(self.active_section, 'text')
+ sec = self.curr_sec()
+ if isinstance(sec, InstructionMemorySection):
+ sec.add_insn(ins)
+ else:
+ raise ParseException("SHOULD NOT BE REACHED")
+
+ def handle_symbol(self, token: RiscVSymbolToken):
+ ASSERT_NOT_IN(token.name, self.symbols)
+ sec_pos = self.curr_sec().size
+ self.symbols[token.name] = (self.active_section, sec_pos)
+
+ def handle_pseudo_op(self, op: RiscVPseudoOpToken):
+ name = 'op_' + op.name
+ if hasattr(self, name):
+ getattr(self, name)(op)
+ else:
+ raise ParseException("Unknown pseudo op: {}".format(op), (op,))
+
+ ## Pseudo op implementations:
+ def op_section(self, op: RiscVPseudoOpToken):
+ ASSERT_LEN(op.args, 1)
+ name = op.args[0][1:]
+ ASSERT_IN(name, ('data', 'rodata', 'text'))
+ getattr(self, 'op_' + name)(op)
+
+ def op_text(self, op: RiscVPseudoOpToken = None):
+ self.set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection)
+
+ def op_data(self, op: RiscVPseudoOpToken = None):
+ self.set_sec('data', MemoryFlags(read_only=False, executable=False))
+
+ def op_rodata(self, op: RiscVPseudoOpToken = None):
+ self.set_sec('rodata', MemoryFlags(read_only=True, executable=False))
+
+ def op_space(self, op: RiscVPseudoOpToken):
+ ASSERT_IN(self.active_section, ('data', 'rodata'))
+ ASSERT_LEN(op.args, 1)
+ size = parse_numeric_argument(op.args[0])
+ self.curr_sec().add(bytearray(size))
+
+ def op_ascii(self, op: RiscVPseudoOpToken):
+ ASSERT_IN(self.active_section, ('data', 'rodata'))
+ ASSERT_LEN(op.args, 1)
+ str = op.args[0][1:-1]
+ self.curr_sec().add(bytearray(str, 'ascii'))
+
+ def op_asciiz(self, op: RiscVPseudoOpToken):
+ ASSERT_IN(self.active_section, ('data', 'rodata'))
+ ASSERT_LEN(op.args, 1)
+ str = op.args[0][1:-1]
+ self.curr_sec().add(bytearray(str + '\0', 'ascii'))
+
+ ## Section handler code
+ def set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection):
+ if name not in self.sections:
+ self.sections[name] = cls(name, flags)
+ self.active_section = name
+
+ def curr_sec(self):
+ return self.sections[self.active_section]
diff --git a/riscemu/MMU.py b/riscemu/MMU.py
new file mode 100644
index 0000000..c8805b8
--- /dev/null
+++ b/riscemu/MMU.py
@@ -0,0 +1,15 @@
+from dataclasses import dataclass
+
+@dataclass(frozen=True)
+class MemoryFlags:
+ read_only: bool
+ executable: bool
+
+class MemoryRegion:
+ addr:int
+ len:int
+ flags: MemoryFlags
+
+
+class MMU:
+ def __init__(self):
diff --git a/riscemu/tokenizer.py b/riscemu/Tokenizer.py
similarity index 99%
rename from riscemu/tokenizer.py
rename to riscemu/Tokenizer.py
index 9d5e499..00c3d16 100644
--- a/riscemu/tokenizer.py
+++ b/riscemu/Tokenizer.py
@@ -231,7 +231,7 @@ class RiscVPseudoOpToken(RiscVToken):
class RiscVTokenizer:
def __init__(self, input: RiscVInput):
self.input = input
- self.tokens = []
+ self.tokens: List[RiscVToken] = []
def tokenize(self):
while self.input.has_next():
@@ -296,4 +296,3 @@ class RiscVTokenizer:
def parse_comment(self):
# just consume the rest
self.input.consume(regex=REG_UNTIL_NEWLINE)
-
diff --git a/riscemu/__init__.py b/riscemu/__init__.py
index 7db1ca4..da9547e 100644
--- a/riscemu/__init__.py
+++ b/riscemu/__init__.py
@@ -1,2 +1,10 @@
from .CPU import CPU, Registers, Syscall, SyscallInterface
-from .tokenizer import RiscVToken, RiscVInput, RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, RiscVPseudoOpToken
+
+from .Tokenizer import RiscVToken, RiscVInput, RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, \
+ RiscVPseudoOpToken, TokenType
+
+from .MMU import MemoryFlags, MemoryRegion, MMU
+
+from .Exceptions import ASSERT_NOT_NULL, ASSERT_LEN, ASSERT_IN, ASSERT_EQ, ASSERT_NOT_IN
+
+from .Executable import ExecutableParser, Executable
diff --git a/riscemu/main.py b/riscemu/main.py
index 96420ae..66dd644 100644
--- a/riscemu/main.py
+++ b/riscemu/main.py
@@ -1,3 +1,3 @@
from .CPU import *
-from .tokenizer import *
+from .Tokenizer import *
diff --git a/run.py b/run.py
index 85cda37..96c492a 100644
--- a/run.py
+++ b/run.py
@@ -26,5 +26,12 @@ loop:
tk = RiscVTokenizer(RiscVInput(example_progr))
tk.tokenize()
+ print("tokens:")
for token in tk.tokens:
print(token)
+
+ ep = ExecutableParser(tk)
+ ep.parse()
+
+ print(ep)
+