parsing of tokenized asm into MemorySections works
This commit is contained in:
parent
2cee60a17c
commit
da4ae7c4c1
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
@ -1,4 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (riscemu)" project-jdk-type="Python SDK" />
|
||||
</project>
|
4
.idea/riscemu.iml
generated
4
.idea/riscemu.iml
generated
@ -1,7 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
|
33
riscemu/Exceptions.py
Normal file
33
riscemu/Exceptions.py
Normal file
@ -0,0 +1,33 @@
|
||||
class ParseException(BaseException):
|
||||
def __init__(self, msg, data=None):
|
||||
super().__init__()
|
||||
self.msg = msg
|
||||
self.data = data
|
||||
|
||||
def message(self):
|
||||
return "{}(\"{}\", data={})".format(self.__class__.__name__, self.msg, self.data)
|
||||
|
||||
|
||||
def ASSERT_EQ(a1, a2):
|
||||
if a1 != a2:
|
||||
raise ParseException("ASSERTION_FAILED: Expected elements to be equal!", (a1, a2))
|
||||
|
||||
|
||||
def ASSERT_LEN(a1, size):
|
||||
if len(a1) != size:
|
||||
raise ParseException("ASSERTION_FAILED: Expected {} to be of length {}".format(a1, size), (a1, size))
|
||||
|
||||
|
||||
def ASSERT_NOT_NULL(a1):
|
||||
if a1 is None:
|
||||
raise ParseException("ASSERTION_FAILED: Expected {} to be non null".format(a1), (a1,))
|
||||
|
||||
|
||||
def ASSERT_NOT_IN(a1, a2):
|
||||
if a1 in a2:
|
||||
raise ParseException("ASSERTION_FAILED: Expected {} to not be in {}".format(a1, a2), (a1,a2))
|
||||
|
||||
|
||||
def ASSERT_IN(a1, a2):
|
||||
if a1 not in a2:
|
||||
raise ParseException("ASSERTION_FAILED: Expected {} to not be in {}".format(a1, a2), (a1,a2))
|
33
riscemu/Executable.py
Normal file
33
riscemu/Executable.py
Normal file
@ -0,0 +1,33 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Tuple
|
||||
from . import MemoryFlags, RiscVInstructionToken, RiscVTokenizer, RiscVSymbolToken, RiscVPseudoOpToken
|
||||
from .Exceptions import *
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemorySection:
|
||||
name: str
|
||||
flags: MemoryFlags
|
||||
size: int = 0
|
||||
start: int = -1
|
||||
content: List[bytearray] = field(default_factory=list)
|
||||
|
||||
def add(self, data: bytearray):
|
||||
self.content.append(data)
|
||||
self.size += len(data)
|
||||
|
||||
|
||||
class InstructionMemorySection(MemorySection):
|
||||
insn: List[RiscVInstructionToken] = field(default_factory=list)
|
||||
|
||||
def add_insn(self, insn: RiscVInstructionToken):
|
||||
self.insn.append(insn)
|
||||
self.size += 4
|
||||
|
||||
|
||||
@dataclass
|
||||
class Executable:
|
||||
run_ptr: Tuple[str, int]
|
||||
sections: Dict[str, MemorySection]
|
||||
symbols: Dict[str, Tuple[str, int]]
|
||||
|
106
riscemu/ExecutableParser.py
Normal file
106
riscemu/ExecutableParser.py
Normal file
@ -0,0 +1,106 @@
|
||||
from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags
|
||||
from .Exceptions import *
|
||||
from .Tokenizer import RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, RiscVPseudoOpToken
|
||||
|
||||
from typing import Dict, Tuple, List
|
||||
|
||||
|
||||
def parse_numeric_argument(arg: str):
|
||||
if arg.startswith('0x') or arg.startswith('0X'):
|
||||
return int(arg, 16)
|
||||
return int(arg)
|
||||
|
||||
class ExecutableParser:
|
||||
tokenizer: RiscVTokenizer
|
||||
|
||||
def __init__(self, tokenizer: RiscVTokenizer):
|
||||
self.instructions: List[RiscVInstructionToken] = list()
|
||||
self.symbols: Dict[str, Tuple[str, int]] = dict()
|
||||
self.sections: Dict[str, MemorySection] = dict()
|
||||
self.tokenizer = tokenizer
|
||||
self.active_section = None
|
||||
self.implicit_sections = False
|
||||
|
||||
def parse(self):
|
||||
for token in self.tokenizer.tokens:
|
||||
if isinstance(token, RiscVInstructionToken):
|
||||
self.parse_instruction(token)
|
||||
elif isinstance(token, RiscVSymbolToken):
|
||||
self.handle_symbol(token)
|
||||
elif isinstance(token, RiscVPseudoOpToken):
|
||||
self.handle_pseudo_op(token)
|
||||
|
||||
def get_execuable(self):
|
||||
start_ptr = ('text', 0)
|
||||
if '_start' in self.symbols:
|
||||
start_ptr = self.symbols['_start']
|
||||
elif 'main' in self.symbols:
|
||||
start_ptr = self.symbols['main']
|
||||
return Executable(start_ptr, self.sections, self.symbols)
|
||||
|
||||
def parse_instruction(self, ins: RiscVInstructionToken):
|
||||
if self.active_section is None:
|
||||
self.op_text()
|
||||
self.implicit_sections = True
|
||||
|
||||
ASSERT_EQ(self.active_section, 'text')
|
||||
sec = self.curr_sec()
|
||||
if isinstance(sec, InstructionMemorySection):
|
||||
sec.add_insn(ins)
|
||||
else:
|
||||
raise ParseException("SHOULD NOT BE REACHED")
|
||||
|
||||
def handle_symbol(self, token: RiscVSymbolToken):
|
||||
ASSERT_NOT_IN(token.name, self.symbols)
|
||||
sec_pos = self.curr_sec().size
|
||||
self.symbols[token.name] = (self.active_section, sec_pos)
|
||||
|
||||
def handle_pseudo_op(self, op: RiscVPseudoOpToken):
|
||||
name = 'op_' + op.name
|
||||
if hasattr(self, name):
|
||||
getattr(self, name)(op)
|
||||
else:
|
||||
raise ParseException("Unknown pseudo op: {}".format(op), (op,))
|
||||
|
||||
## Pseudo op implementations:
|
||||
def op_section(self, op: RiscVPseudoOpToken):
|
||||
ASSERT_LEN(op.args, 1)
|
||||
name = op.args[0][1:]
|
||||
ASSERT_IN(name, ('data', 'rodata', 'text'))
|
||||
getattr(self, 'op_' + name)(op)
|
||||
|
||||
def op_text(self, op: RiscVPseudoOpToken = None):
|
||||
self.set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection)
|
||||
|
||||
def op_data(self, op: RiscVPseudoOpToken = None):
|
||||
self.set_sec('data', MemoryFlags(read_only=False, executable=False))
|
||||
|
||||
def op_rodata(self, op: RiscVPseudoOpToken = None):
|
||||
self.set_sec('rodata', MemoryFlags(read_only=True, executable=False))
|
||||
|
||||
def op_space(self, op: RiscVPseudoOpToken):
|
||||
ASSERT_IN(self.active_section, ('data', 'rodata'))
|
||||
ASSERT_LEN(op.args, 1)
|
||||
size = parse_numeric_argument(op.args[0])
|
||||
self.curr_sec().add(bytearray(size))
|
||||
|
||||
def op_ascii(self, op: RiscVPseudoOpToken):
|
||||
ASSERT_IN(self.active_section, ('data', 'rodata'))
|
||||
ASSERT_LEN(op.args, 1)
|
||||
str = op.args[0][1:-1]
|
||||
self.curr_sec().add(bytearray(str, 'ascii'))
|
||||
|
||||
def op_asciiz(self, op: RiscVPseudoOpToken):
|
||||
ASSERT_IN(self.active_section, ('data', 'rodata'))
|
||||
ASSERT_LEN(op.args, 1)
|
||||
str = op.args[0][1:-1]
|
||||
self.curr_sec().add(bytearray(str + '\0', 'ascii'))
|
||||
|
||||
## Section handler code
|
||||
def set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection):
|
||||
if name not in self.sections:
|
||||
self.sections[name] = cls(name, flags)
|
||||
self.active_section = name
|
||||
|
||||
def curr_sec(self):
|
||||
return self.sections[self.active_section]
|
15
riscemu/MMU.py
Normal file
15
riscemu/MMU.py
Normal file
@ -0,0 +1,15 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemoryFlags:
|
||||
read_only: bool
|
||||
executable: bool
|
||||
|
||||
class MemoryRegion:
|
||||
addr:int
|
||||
len:int
|
||||
flags: MemoryFlags
|
||||
|
||||
|
||||
class MMU:
|
||||
def __init__(self):
|
@ -231,7 +231,7 @@ class RiscVPseudoOpToken(RiscVToken):
|
||||
class RiscVTokenizer:
|
||||
def __init__(self, input: RiscVInput):
|
||||
self.input = input
|
||||
self.tokens = []
|
||||
self.tokens: List[RiscVToken] = []
|
||||
|
||||
def tokenize(self):
|
||||
while self.input.has_next():
|
||||
@ -296,4 +296,3 @@ class RiscVTokenizer:
|
||||
def parse_comment(self):
|
||||
# just consume the rest
|
||||
self.input.consume(regex=REG_UNTIL_NEWLINE)
|
||||
|
@ -1,2 +1,10 @@
|
||||
from .CPU import CPU, Registers, Syscall, SyscallInterface
|
||||
from .tokenizer import RiscVToken, RiscVInput, RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, RiscVPseudoOpToken
|
||||
|
||||
from .Tokenizer import RiscVToken, RiscVInput, RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, \
|
||||
RiscVPseudoOpToken, TokenType
|
||||
|
||||
from .MMU import MemoryFlags, MemoryRegion, MMU
|
||||
|
||||
from .Exceptions import ASSERT_NOT_NULL, ASSERT_LEN, ASSERT_IN, ASSERT_EQ, ASSERT_NOT_IN
|
||||
|
||||
from .Executable import ExecutableParser, Executable
|
||||
|
@ -1,3 +1,3 @@
|
||||
from .CPU import *
|
||||
from .tokenizer import *
|
||||
from .Tokenizer import *
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user