[wip] almost done with the rework of the parser and internal data structure representation of programs

assembly-parser-rework
Anton Lydike 3 years ago
parent 84562de98f
commit dc4dca6fea

@ -9,12 +9,12 @@ on them.
import sys import sys
from typing import Tuple, List, Dict, Callable, Type from typing import Tuple, List, Dict, Callable, Type
from .Executable import MemoryFlags from .base_types import MemoryFlags
from .Syscall import SyscallInterface, get_syscall_symbols from .syscall import SyscallInterface, get_syscall_symbols
from .Exceptions import RiscemuBaseException, LaunchDebuggerException from .exceptions import RiscemuBaseException, LaunchDebuggerException
from .MMU import MMU from .MMU import MMU
from .Config import RunConfig from .config import RunConfig
from .Registers import Registers from .registers import Registers
from .debug import launch_debug_session from .debug import launch_debug_session
from .colors import FMT_CPU, FMT_NONE, FMT_ERROR from .colors import FMT_CPU, FMT_NONE, FMT_ERROR
@ -23,7 +23,7 @@ import riscemu
import typing import typing
if typing.TYPE_CHECKING: if typing.TYPE_CHECKING:
from . import Executable, LoadedExecutable, LoadedInstruction from . import base_types, LoadedExecutable, LoadedInstruction
from .instructions.InstructionSet import InstructionSet from .instructions.InstructionSet import InstructionSet
@ -78,7 +78,7 @@ class CPU:
""" """
return RiscVTokenizer(tokenizer_input, self.all_instructions()) return RiscVTokenizer(tokenizer_input, self.all_instructions())
def load(self, e: riscemu.Executable): def load(self, e: riscemu.base_types):
""" """
Load an executable into Memory Load an executable into Memory
""" """

@ -1,319 +0,0 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
This file holds Executable and LoadedExecutable classes as well as loading and some linking code.
FIXME: refactor this code into muliple files
"""
from dataclasses import dataclass, field
from typing import Dict, List, Tuple, Union, Optional
from .Exceptions import *
from .helpers import *
from math import log
import typing
if typing.TYPE_CHECKING:
from .Tokenizer import RiscVInstructionToken
@dataclass(frozen=True)
class MemoryFlags:
read_only: bool
executable: bool
def __repr__(self):
return "{}({},{})".format(
self.__class__.__name__,
'ro' if self.read_only else 'rw',
'x' if self.executable else '-'
)
@dataclass
class MemorySection:
name: str
flags: MemoryFlags
size: int = 0
content: List[bytearray] = field(default_factory=list)
def add(self, data: bytearray):
self.content.append(data)
self.size += len(data)
def continuous_content(self, parent: 'LoadedExecutable'):
"""
converts the content into one continuous bytearray
"""
if self.size == 0:
return bytearray(0)
content = self.content[0]
for b in self.content[1:]:
content += b
return content
@dataclass
class InstructionMemorySection(MemorySection):
content: List['RiscVInstructionToken'] = field(default_factory=list)
def add_insn(self, insn: 'RiscVInstructionToken'):
self.content.append(insn)
self.size += 1
def continuous_content(self, parent: 'LoadedExecutable'):
return [
LoadedInstruction(ins.instruction, ins.args, parent)
for ins in self.content
]
@dataclass()
class Executable:
run_ptr: Tuple[str, int]
sections: Dict[str, MemorySection]
symbols: Dict[str, Tuple[str, int]]
exported_symbols: List[str]
name: str
def __repr__(self):
return "{}(sections = {}, symbols = {}, run_ptr = {}, globals={})".format(
self.__class__.__name__,
" ".join(self.sections.keys()),
" ".join(self.symbols.keys()),
self.run_ptr,
",".join(self.exported_symbols)
)
### LOADING CODE
@dataclass(frozen=True)
class LoadedInstruction:
"""
An instruction which is loaded into memory. It knows the binary it belongs to to resolve symbols
"""
name: str
args: List[str]
bin: 'LoadedExecutable'
def get_imm(self, num: int):
"""
parse and get immediate argument
"""
if len(self.args) <= num:
raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
arg = self.args[num]
# look up symbols
if self.bin.has_symb(arg):
return self.bin.lookup_symbol(arg)
return parse_numeric_argument(arg)
def get_imm_reg(self, num: int):
"""
parse and get an argument imm(reg)
"""
if len(self.args) <= num:
raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
arg = self.args[num]
ASSERT_IN("(", arg)
imm, reg = arg[:-1].split("(")
if self.bin.has_symb(imm):
return self.bin.lookup_symbol(imm), reg
return parse_numeric_argument(imm), reg
def get_reg(self, num: int):
"""
parse and get an register argument
"""
if len(self.args) <= num:
raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
return self.args[num]
def __repr__(self):
return "{} {}".format(self.name, ", ".join(self.args))
@dataclass(frozen=True)
class LoadedMemorySection:
"""
A section which is loaded into memory
"""
name: str
base: int
size: int
content: Union[List[LoadedInstruction], bytearray] = field(repr=False)
flags: MemoryFlags
owner: str
def read(self, offset: int, size: int):
if offset < 0:
raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, size, 'read')
if offset + size > self.size:
raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
size, 'read')
return self.content[offset: offset + size]
def read_instruction(self, offset):
if not self.flags.executable:
raise MemoryAccessException('Section not executable!', self.base + offset, 1, 'read exec')
if offset < 0:
raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'read exec')
if offset >= self.size:
raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
1, 'read exec')
return self.content[offset]
def write(self, offset, size, data):
if self.flags.read_only:
raise MemoryAccessException('Section not writeable {}'.format(self.name), self.base + offset, size, 'write')
if offset < 0:
raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'write')
if offset >= self.size:
raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
size, 'write')
for i in range(size):
self.content[offset + i] = data[i]
def dump(self, at_addr=None, fmt='hex', max_rows=10, group=4, bytes_per_row=16, all=False):
highlight = -1
if at_addr is None:
at_addr = self.base
else:
highlight = at_addr - self.base
at_off = at_addr - self.base
start = max(align_addr(at_off - ((max_rows * bytes_per_row) // 2), 8) - 8, 0)
if all:
end = self.size
start = 0
else:
end = min(start + (max_rows * bytes_per_row), self.size)
fmt_str = " 0x{:0" + str(ceil(log(self.base + end, 16))) + "X}: {}"
if self.flags.executable:
# this section holds instructions!
start = 0 if all else max(at_off - (max_rows // 2), 0)
end = self.size if all else min(self.size, start + max_rows)
print(FMT_MEM + "{}, viewing {} instructions:".format(
self, end - start
) + FMT_NONE)
for i in range(start, end):
if i == highlight:
ins = FMT_UNDERLINE + FMT_ORANGE + repr(self.content[i]) + FMT_NONE
else:
ins = repr(self.content[i])
print(fmt_str.format(self.base + i, ins))
else:
print(FMT_MEM + "{}, viewing {} bytes:".format(
self, end - start
) + FMT_NONE)
for i in range(0, end - start, bytes_per_row):
data = self.content[start + i: min(start + i + bytes_per_row, end)]
if start + i <= highlight <= start + i + bytes_per_row:
# do hightlight here!
hi_ind = (highlight - start - i) // group
print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group, highlight=hi_ind)))
else:
print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group)))
if end == self.size:
print(FMT_MEM + "End of section!" + FMT_NONE)
else:
print(FMT_MEM + "More bytes ..." + FMT_NONE)
def __repr__(self):
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
self.__class__.__name__,
self.name,
self.base,
self.size,
self.flags,
self.owner
)
class LoadedExecutable:
"""
This represents an executable which is loaded into memory at address base_addr
This is basicalle the "loader" in normal system environments
It initializes the stack and heap
It still holds a symbol table, that is not accessible memory since I don't want to deal with
binary strings in memory etc.
"""
name: str
base_addr: int
sections_by_name: Dict[str, LoadedMemorySection]
sections: List[LoadedMemorySection]
symbols: Dict[str, int]
run_ptr: int
exported_symbols: Dict[str, int]
global_symbol_table: Dict[str, int]
def __init__(self, exe: Executable, base_addr: int, global_symbol_table: Dict[str, int]):
self.name = exe.name
self.base_addr = base_addr
self.sections = list()
self.sections_by_name = dict()
self.symbols = dict()
self.exported_symbols = dict()
self.global_symbol_table = global_symbol_table
curr = base_addr
for sec in exe.sections.values():
loaded_sec = LoadedMemorySection(
sec.name,
curr,
sec.size,
sec.continuous_content(self),
sec.flags,
self.name
)
self.sections.append(loaded_sec)
self.sections_by_name[loaded_sec.name] = loaded_sec
curr = align_addr(loaded_sec.size + curr)
for name, (sec_name, offset) in exe.symbols.items():
if sec_name == '_static_':
self.symbols[name] = offset
else:
ASSERT_IN(sec_name, self.sections_by_name)
self.symbols[name] = self.sections_by_name[sec_name].base + offset
for name in exe.exported_symbols:
self.exported_symbols[name] = self.symbols[name]
self.size = curr - base_addr
# translate run_ptr from executable
run_ptr_sec, run_ptr_off = exe.run_ptr
self.run_ptr = self.sections_by_name[run_ptr_sec].base + run_ptr_off
def lookup_symbol(self, name):
if name in self.symbols:
return self.symbols[name]
if name in self.global_symbol_table:
return self.global_symbol_table[name]
raise LinkerException('Symbol {} not found!'.format(name), (self,))
def __repr__(self):
return '{}[{}](base=0x{:08X}, size={}bytes, sections={}, run_ptr=0x{:08X})'.format(
self.__class__.__name__,
self.name,
self.base_addr,
self.size,
" ".join(self.sections_by_name.keys()),
self.run_ptr
)
def has_symb(self, arg):
return arg in self.symbols or arg in self.global_symbol_table

@ -1,193 +0,0 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
This file holds the parser that parses the tokenizer output.
"""
from .helpers import parse_numeric_argument, int_to_bytes
from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags
from .Exceptions import *
from .Tokenizer import tokenize, TokenType, Token, COMMA, NEWLINE
from typing import Dict, Tuple, List, Optional
class ExecutableParser:
"""
Parses output form the RiscVTokenizer
"""
tokenizer: 'RiscVTokenizer'
def __init__(self, tokenizer: 'RiscVTokenizer'):
self.instructions: List['RiscVInstructionToken'] = list()
self.symbols: Dict[str, Tuple[str, int]] = dict()
self.sections: Dict[str, MemorySection] = dict()
self.tokenizer = tokenizer
self.active_section: Optional[str] = None
self.implicit_sections = False
self.globals: List[str] = list()
def parse(self) -> Executable:
"""
parse tokenizer output into an executable
:return: the parsed executable
:raise ParseException: Raises a ParseException when invalid input is read
"""
for token in self.tokenizer.tokens:
if isinstance(token, 'RiscVInstructionToken'):
self.parse_instruction(token)
elif isinstance(token, 'RiscVSymbolToken'):
self.handle_symbol(token)
elif isinstance(token, 'RiscVPseudoOpToken'):
self.handle_pseudo_op(token)
return self._get_execuable()
def _get_execuable(self) -> Executable:
start_ptr = ('text', 0)
if '_start' in self.symbols:
start_ptr = self.symbols['_start']
elif 'main' in self.symbols:
start_ptr = self.symbols['main']
return Executable(start_ptr, self.sections, self.symbols, self.globals, self.tokenizer.name)
def parse_instruction(self, ins: 'RiscVInstructionToken') -> None:
"""
parses an Instruction token
:param ins: the instruction token
"""
if self.active_section is None:
self.op_text()
self.implicit_sections = True
ASSERT_EQ(self.active_section, 'text')
sec = self._curr_sec()
if isinstance(sec, InstructionMemorySection):
sec.add_insn(ins)
else:
raise ParseException("SHOULD NOT BE REACHED")
def handle_symbol(self, token: 'RiscVSymbolToken'):
"""
Handle a symbol token (such as 'main:')
:param token: the symbol token
"""
ASSERT_NOT_IN(token.name, self.symbols)
ASSERT_NOT_NULL(self.active_section)
sec_pos = self._curr_sec().size
self.symbols[token.name] = (self.active_section, sec_pos)
def handle_pseudo_op(self, op: 'RiscVPseudoOpToken'):
"""
Handle a pseudo op token (such as '.word 0xffaabbcc')
:param op: the peseudo-op token
"""
name = 'op_' + op.name
if hasattr(self, name):
getattr(self, name)(op)
else:
raise ParseException("Unknown pseudo op: {}".format(op), (op,))
## Pseudo op implementations:
def op_section(self, op: 'RiscVPseudoOpToken'):
"""
handles a .section token
:param op: The token
"""
ASSERT_LEN(op.args, 1)
name = op.args[0][1:]
ASSERT_IN(name, ('data', 'rodata', 'text'))
getattr(self, 'op_' + name)(op)
def op_text(self, op: 'RiscVPseudoOpToken' = None):
"""
handles a .text token
:param op: The token
"""
self._set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection)
def op_data(self, op: 'RiscVPseudoOpToken' = None):
"""
handles a .data token
:param op: The token
"""
self._set_sec('data', MemoryFlags(read_only=False, executable=False))
def op_rodata(self, op: 'RiscVPseudoOpToken' = None):
"""
handles a .rodata token
:param op: The token
"""
self._set_sec('rodata', MemoryFlags(read_only=True, executable=False))
def op_space(self, op: 'RiscVPseudoOpToken'):
"""
handles a .space token. Inserts empty space into the current (data or rodata) section
:param op: The token
"""
ASSERT_IN(self.active_section, ('data', 'rodata'))
ASSERT_LEN(op.args, 1)
size = parse_numeric_argument(op.args[0])
self._curr_sec().add(bytearray(size))
def op_ascii(self, op: 'RiscVPseudoOpToken'):
"""
handles a .ascii token. Inserts ascii encoded text into the currrent data section
:param op: The token
"""
ASSERT_IN(self.active_section, ('data', 'rodata'))
ASSERT_LEN(op.args, 1)
str = op.args[0][1:-1].encode('ascii').decode('unicode_escape')
self._curr_sec().add(bytearray(str, 'ascii'))
def op_asciiz(self, op: 'RiscVPseudoOpToken'):
"""
handles a .ascii token. Inserts nullterminated ascii encoded text into the currrent data section
:param op: The token
"""
ASSERT_IN(self.active_section, ('data', 'rodata'))
ASSERT_LEN(op.args, 1)
str = op.args[0][1:-1].encode('ascii').decode('unicode_escape')
self._curr_sec().add(bytearray(str + '\0', 'ascii'))
def op_global(self, op: 'RiscVPseudoOpToken'):
"""
handles a .global token. Marks the token as global
:param op: The token
"""
ASSERT_LEN(op.args, 1)
name = op.args[0]
self.globals.append(name)
def op_set(self, op: 'RiscVPseudoOpToken'):
"""
handles a .set name, val token. Sets the symbol name to val
:param op: The token
"""
ASSERT_LEN(op.args, 2)
name = op.args[0]
val = parse_numeric_argument(op.args[1])
self.symbols[name] = ('_static_', val)
def op_align(self, op: 'RiscVPseudoOpToken'):
"""
handles an align token. Currently a nop (just not implemented fully yet, as linker handles most alignement tasks)
:param op: The token
"""
pass
def op_word(self, op: 'RiscVPseudoOpToken'):
ASSERT_LEN(op.args, 1)
val = parse_numeric_argument(op.args[0])
self._curr_sec().add(int_to_bytes(val, 4))
## Section handler code
def _set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection):
if name not in self.sections:
self.sections[name] = cls(name, flags)
self.active_section = name
def _curr_sec(self):
return self.sections[self.active_section]

@ -4,10 +4,10 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT SPDX-License-Identifier: MIT
""" """
from .Config import RunConfig from .base_types import InstructionContext, Instruction, MemorySection, MemoryFlags, T_RelativeAddress, T_AbsoluteAddress, \
from .Executable import Executable, LoadedExecutable, LoadedMemorySection, LoadedInstruction, MemoryFlags Program
from .helpers import align_addr, int_from_bytes from .helpers import align_addr, int_from_bytes
from .Exceptions import OutOfMemoryException, InvalidAllocationException from .exceptions import OutOfMemoryException, InvalidAllocationException
from .colors import * from .colors import *
from typing import Dict, List, Tuple, Optional from typing import Dict, List, Tuple, Optional
@ -27,19 +27,14 @@ class MMU:
No single allocation can be bigger than 64 MB No single allocation can be bigger than 64 MB
""" """
sections: List[LoadedMemorySection] sections: List[MemorySection]
""" """
A list of all loaded memory sections A list of all loaded memory sections
""" """
binaries: List[LoadedExecutable] programs: List[Program]
""" """
A list of all loaded executables A list of all loaded programs
"""
last_bin: Optional[LoadedExecutable] = None
"""
The last loaded executable (the next executable is inserted directly after this one)
""" """
global_symbols: Dict[str, int] global_symbols: Dict[str, int]
@ -47,79 +42,14 @@ class MMU:
The global symbol table The global symbol table
""" """
last_ins_sec: Optional[LoadedMemorySection] def __init__(self):
def __init__(self, conf: RunConfig):
"""
Create a new MMU, respecting the active RunConfiguration
:param conf: The config to respect
"""
self.sections: List[LoadedMemorySection] = list()
self.binaries: List[LoadedExecutable] = list()
self.first_free_addr: int = 0x100
self.conf: RunConfig = conf
self.global_symbols: Dict[str, int] = dict()
self.last_ins_sec = None
def load_bin(self, exe: Executable) -> LoadedExecutable:
"""
Load an executable into memory
:param exe: the executable to load
:return: A LoadedExecutable
:raises OutOfMemoryException: When all memory is used
"""
# align to 8 byte word
addr = align_addr(self.first_free_addr)
loaded_bin = LoadedExecutable(exe, addr, self.global_symbols)
if loaded_bin.size + addr > self.max_size:
raise OutOfMemoryException('load of executable')
self.binaries.append(loaded_bin)
self.first_free_addr = loaded_bin.base_addr + loaded_bin.size
# read sections into sec dict
for sec in loaded_bin.sections:
self.sections.append(sec)
self.global_symbols.update(loaded_bin.exported_symbols)
print(FMT_MEM + "[MMU] Successfully loaded{}: {}".format(FMT_NONE, loaded_bin))
return loaded_bin
def allocate_section(self, name: str, req_size: int, flag: MemoryFlags):
""" """
Used to allocate a memory region (data only). Use `load_bin` if you want to load a binary, this is used for Create a new MMU
stack and maybe malloc in the future.
:param name: Name of the section to allocate
:param req_size: The requested size
:param flag: The flags protecting this memory section
:return: The LoadedMemorySection
""" """
if flag.executable: self.sections = list()
raise InvalidAllocationException('cannot allocate executable section', name, req_size, flag) self.global_symbols = dict()
if req_size < 0:
raise InvalidAllocationException('Invalid size request', name, req_size, flag)
if req_size > self.max_alloc_size: def get_sec_containing(self, addr: T_AbsoluteAddress) -> Optional[MemorySection]:
raise InvalidAllocationException('Cannot allocate more than {} bytes at a time'.format(self.max_alloc_size),
name, req_size, flag)
base = align_addr(self.first_free_addr)
size = align_addr(req_size)
sec = LoadedMemorySection(name, base, size, bytearray(size), flag, "<runtime>")
self.sections.append(sec)
self.first_free_addr = base + size
return sec
def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]:
""" """
Returns the section that contains the address addr Returns the section that contains the address addr
@ -131,29 +61,25 @@ class MMU:
return sec return sec
return None return None
def get_bin_containing(self, addr: int) -> Optional[LoadedExecutable]: def get_bin_containing(self, addr: T_AbsoluteAddress) -> Optional[Program]:
for exe in self.binaries: for exe in self.binaries:
if exe.base_addr <= addr < exe.base_addr + exe.size: if exe.base_addr <= addr < exe.base_addr + exe.size:
return exe return exe
return None return None
def read_ins(self, addr: int) -> LoadedInstruction: def read_ins(self, addr: T_AbsoluteAddress) -> Instruction:
""" """
Read a single instruction located at addr Read a single instruction located at addr
:param addr: The location :param addr: The location
:return: The Instruction :return: The Instruction
""" """
sec = self.last_ins_sec
if sec is not None and sec.base <= addr < sec.base + sec.size:
return sec.read_instruction(addr - sec.base)
sec = self.get_sec_containing(addr) sec = self.get_sec_containing(addr)
self.last_ins_sec = sec
if sec is None: if sec is None:
print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! " print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! "
"Have you forgotten an exit syscall or ret statement?" + FMT_NONE) "Have you forgotten an exit syscall or ret statement?" + FMT_NONE)
raise RuntimeError("No next instruction available!") raise RuntimeError("No next instruction available!")
return sec.read_instruction(addr - sec.base) return sec.read_ins(addr - sec.base)
def read(self, addr: int, size: int) -> bytearray: def read(self, addr: int, size: int) -> bytearray:
""" """
@ -164,6 +90,9 @@ class MMU:
:return: The bytearray at addr :return: The bytearray at addr
""" """
sec = self.get_sec_containing(addr) sec = self.get_sec_containing(addr)
if sec is None:
print(FMT_MEM + "[MMU] Trying to read data form invalid region at 0x{:x}! ".format(addr) + FMT_NONE)
raise RuntimeError("Reading from uninitialized memory region!")
return sec.read(addr - sec.base, size) return sec.read(addr - sec.base, size)
def write(self, addr: int, size: int, data): def write(self, addr: int, size: int, data):
@ -176,7 +105,7 @@ class MMU:
""" """
sec = self.get_sec_containing(addr) sec = self.get_sec_containing(addr)
if sec is None: if sec is None:
print(FMT_MEM + '[MMU] Invalid write into non-initialized section at 0x{:08X}'.format(addr) + FMT_NONE) print(FMT_MEM + '[MMU] Invalid write into non-initialized region at 0x{:08X}'.format(addr) + FMT_NONE)
raise RuntimeError("No write pls") raise RuntimeError("No write pls")
return sec.write(addr - sec.base, size, data) return sec.write(addr - sec.base, size, data)
@ -195,7 +124,7 @@ class MMU:
return return
sec.dump(addr, *args, **kwargs) sec.dump(addr, *args, **kwargs)
def symbol(self, symb: str): def label(self, symb: str):
""" """
Look up the symbol symb in all local symbol tables (and the global one) Look up the symbol symb in all local symbol tables (and the global one)
@ -204,9 +133,9 @@ class MMU:
print(FMT_MEM + "[MMU] Lookup for symbol {}:".format(symb) + FMT_NONE) print(FMT_MEM + "[MMU] Lookup for symbol {}:".format(symb) + FMT_NONE)
if symb in self.global_symbols: if symb in self.global_symbols:
print(" Found global symbol {}: 0x{:X}".format(symb, self.global_symbols[symb])) print(" Found global symbol {}: 0x{:X}".format(symb, self.global_symbols[symb]))
for b in self.binaries: for section in self.sections:
if symb in b.symbols: if symb in section.context.labels:
print(" Found local symbol {}: 0x{:X} in {}".format(symb, b.symbols[symb], b.name)) print(" Found local labels {}: 0x{:X} in {}".format(symb, section.context.labels[symb], section.name))
def read_int(self, addr: int) -> int: def read_int(self, addr: int) -> int:
return int_from_bytes(self.read(addr, 4)) return int_from_bytes(self.read(addr, 4))

@ -8,21 +8,19 @@ This package aims at providing an all-round usable RISC-V emulator and debugger
It contains everything needed to run assembly files, so you don't need any custom compilers or toolchains It contains everything needed to run assembly files, so you don't need any custom compilers or toolchains
""" """
from .Exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ from .exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \
ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException
from .Executable import Executable, LoadedExecutable, LoadedMemorySection from .base_types import Executable, LoadedExecutable, LoadedMemorySection
from .ExecutableParser import ExecutableParser
from .instructions import * from .instructions import *
from .MMU import MMU from .MMU import MMU
from .Registers import Registers from .registers import Registers
from .Syscall import SyscallInterface, Syscall from .syscall import SyscallInterface, Syscall
from .CPU import CPU from .CPU import CPU
from .Config import RunConfig from .config import RunConfig
__author__ = "Anton Lydike <Anton@Lydike.com>" __author__ = "Anton Lydike <Anton@Lydike.com>"
__copyright__ = "Copyright 2021 Anton Lydike" __copyright__ = "Copyright 2021 Anton Lydike"

@ -0,0 +1,169 @@
from typing import Optional, Tuple, Union
from enum import Enum, auto
from typing import Optional, Tuple, Union
from helpers import parse_numeric_argument
from .base_types import Program, T_RelativeAddress, InstructionContext
from .colors import FMT_PARSE, FMT_NONE
from .exceptions import ParseException
from .helpers import ASSERT_LEN
from .tokenizer import Token
from .types import BinaryDataMemorySection, InstructionMemorySection
INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini')
class MemorySectionType(Enum):
Data = auto()
Instructions = auto()
class CurrentSection:
name: str
data: Union[list, bytearray]
type: MemorySectionType
def current_address(self) -> T_RelativeAddress:
if self.type == MemorySectionType.Data:
return len(self.data)
return len(self.data) * 4
def __repr__(self):
return "{}(name={},data={},type={})".format(
self.__class__.__name__, self.name,
self.data, self.type.name
)
class ParseContext:
section: Optional[CurrentSection]
context: InstructionContext
program: Program
def __init__(self, name: str):
self.program = Program(name)
self.context = self.program.context
self.section = None
def finalize(self) -> Program:
self.finalize_section()
return self.program
def finalize_section(self):
if self.section is None:
return
if self.section.type == MemorySectionType.Data:
section = BinaryDataMemorySection(self.section.data, self.section.name, self.context)
self.program.add_section(section)
elif self.section.type == MemorySectionType.Instructions:
section = InstructionMemorySection(self.section.data, self.section.name, self.context)
self.program.add_section(section)
def __repr__(self):
return "{}(\n\tsetion={},\n\tprogram={}\n)".format(
self.__class__.__name__, self.section, self.program
)
def ASSERT_IN_SECTION_TYPE(context: ParseContext, type: MemorySectionType):
if context.section is None:
raise ParseException('Error, expected to be in {} section, but no section is present...'.format(type.name))
if context.section.type != type:
raise ParseException(
'Error, expected to be in {} section, but currently in {}...'.format(type.name, context.section)
)
def get_section_base_name(section_name: str) -> str:
return '.' + section_name.split('.')[1]
class AssemblerDirectives:
"""
This class represents a collection of all assembler directives as documented by
https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md#pseudo-ops
All class methods prefixed with op_ are directly used as assembler directives.
"""
@classmethod
def op_align(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
align_to = parse_numeric_argument(args[0])
current_mod = context.section.current_address() % align_to
if current_mod == 0:
return
context.section.data += bytearray(align_to - current_mod)
@classmethod
def op_section(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
context.finalize_section()
if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES:
context.section.type = MemorySectionType.Instructions
context.section.data = list()
else:
context.section.type = MemorySectionType.Data
context.section.data = bytearray()
context.section.name = args[0]
@classmethod
def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
context.program.global_labels.add(args[0])
@classmethod
def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 2)
name = args[0]
value = parse_numeric_argument(args[1])
context.context.labels[name] = value
@classmethod
def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
size = parse_numeric_argument(args[0])
cls.add_bytes(size, bytearray(size), context)
@classmethod
def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext):
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
if content is None:
content = bytearray(size)
@classmethod
def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True):
encoded_bytes = bytearray(text.encode('ascii'))
if zero_terminate:
encoded_bytes += bytearray(1)
cls.add_bytes(len(encoded_bytes), encoded_bytes, context)
@classmethod
def handle_instruction(cls, token: Token, args: Tuple[str], context: ParseContext):
op = token.value[1:]
if hasattr(cls, 'op_' + op):
getattr(cls, 'op_' + op)(token, args, context)
elif op in ('text', 'data', 'rodata', 'bss', 'sbss'):
cls.op_section(token, (token.value,), context)
elif op in ('string', 'asciiz', 'asciz', 'ascii'):
ASSERT_LEN(args, 1)
cls.add_text(args[0], context, op == 'ascii')
elif op in DATA_OP_SIZES:
size = DATA_OP_SIZES[op]
for arg in args:
cls.add_bytes(size, parse_numeric_argument(arg), context)
else:
print(FMT_PARSE + "Unknown assembler directive: {} {} in {}".format(token, args, context) + FMT_NONE)
DATA_OP_SIZES = {
'byte': 1,
'2byte': 2, 'half': 2, 'short': 2,
'4byte': 4, 'word': 4, 'long': 4,
'8byte': 8, 'dword': 8, 'quad': 8,
}

@ -0,0 +1,186 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
This file contains base classes which represent loaded programs
"""
import re
from abc import ABC
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Set
from collections import defaultdict
from .helpers import *
T_RelativeAddress = int
T_AbsoluteAddress = int
NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
@dataclass(frozen=True)
class MemoryFlags:
read_only: bool
executable: bool
def __repr__(self):
return "{}({},{})".format(
self.__class__.__name__,
'ro' if self.read_only else 'rw',
'x' if self.executable else '-'
)
class InstructionContext:
base_address: T_AbsoluteAddress
"""
The address where the instruction block is placed
"""
labels: Dict[str, T_RelativeAddress]
"""
This dictionary maps all labels to their relative position of the instruction block
"""
numbered_labels: Dict[str, List[T_RelativeAddress]]
"""
This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where
the label was placed
"""
def __init__(self):
self.labels = dict()
self.numbered_labels = defaultdict(list)
self.base_address = 0
def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_RelativeAddress]:
if NUMBER_SYMBOL_PATTERN.match(symbol):
if address_at is None:
raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
direction = symbol[-1]
if direction == 'b':
return max([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr < address_at],
default=None)
else:
return min([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr > address_at],
default=None)
else:
return self.labels.get(symbol, None)
class Instruction(ABC):
name: str
args: tuple
@abstractmethod
def get_imm(self, num: int) -> int:
"""
parse and get immediate argument
"""
pass
@abstractmethod
def get_imm_reg(self, num: int) -> Tuple[int, str]:
"""
parse and get an argument imm(reg)
"""
pass
@abstractmethod
def get_reg(self, num: int) -> str:
"""
parse and get an register argument
"""
pass
def __repr__(self):
return "{} {}".format(self.name, ", ".join(self.args))
@dataclass
class MemorySection(ABC):
name: str
flags: MemoryFlags
size: int
base: T_AbsoluteAddress
owner: str
context: InstructionContext
@abstractmethod
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
pass
@abstractmethod
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
pass
@abstractmethod
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
pass
def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress], fmt: str = 'hex',
bytes_per_row: int = 16, rows: int = 10, group: int = 4):
if self.flags.executable:
bytes_per_row = 4
highlight = None
if end is None:
end = start + (bytes_per_row * (rows // 2))
highlight = start
start = start - (bytes_per_row * (rows // 2))
if self.flags.executable:
print(FMT_MEM + "{}, viewing {} instructions:".format(
self, (end - start) // 4
) + FMT_NONE)
for addr in range(start, end, 4):
if addr == highlight:
print(FMT_UNDERLINE + FMT_ORANGE, end='')
print("0x{:x}: {}{}".format(
self.base + addr, self.read_ins(addr), FMT_NONE
))
else:
print(FMT_MEM + "{}, viewing {} bytes:".format(
self, (end - start)
) + FMT_NONE)
for addr in range(start, end, bytes_per_row):
hi_ind = (highlight - addr) // group
print("0x{:x}: {}{}".format(
self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
))
def __repr__(self):
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
self.__class__.__name__,
self.name,
self.base,
self.size,
self.flags,
self.owner
)
class Program:
name: str
context: InstructionContext
global_labels: Set[str]
sections: List[MemorySection]
base: T_AbsoluteAddress = 0
def __init__(self, name: str, base: int = 0):
self.name = name
self.context = InstructionContext()
self.sections = []
self.base = base
self.global_labels = set()
def add_section(self, sec: MemorySection):
self.sections.append(sec)
def __repr__(self):
return "{}(name={},context={},globals={},sections={},base={})".format(
self.__class__.__name__, self.name, self.context, self.global_labels,
[s.name for s in self.sections], self.base
)

@ -10,7 +10,7 @@ from typing import Optional
@dataclass(frozen=True, init=True) @dataclass(frozen=True, init=True)
class RunConfig: class RunConfig:
stack_size: int = 8 * 1024 * 64 # for 8KB stack stack_size: int = 8 * 1024 * 64 # for 8KB stack
include_scall_symbols: bool = True include_scall_symbols: bool = True
add_accept_imm: bool = False add_accept_imm: bool = False
# debugging # debugging
@ -21,3 +21,5 @@ class RunConfig:
scall_fs: bool = False scall_fs: bool = False
verbosity: int = 0 verbosity: int = 0
CONFIG = RunConfig()

@ -5,9 +5,9 @@ SPDX-License-Identifier: MIT
""" """
import typing import typing
from .Registers import Registers from .registers import Registers
from .colors import FMT_DEBUG, FMT_NONE from .colors import FMT_DEBUG, FMT_NONE
from .Executable import LoadedInstruction from .base_types import Instruction
from .helpers import * from .helpers import *
if typing.TYPE_CHECKING: if typing.TYPE_CHECKING:
@ -50,7 +50,7 @@ def launch_debug_session(cpu: 'CPU', mmu: 'MMU', reg: 'Registers', prompt=""):
return return
bin = mmu.get_bin_containing(cpu.pc) bin = mmu.get_bin_containing(cpu.pc)
ins = LoadedInstruction(name, list(args), bin) ins = Instruction(name, list(args), bin)
print(FMT_DEBUG + "Running instruction " + ins + FMT_NONE) print(FMT_DEBUG + "Running instruction " + ins + FMT_NONE)
cpu.run_instruction(ins) cpu.run_instruction(ins)

@ -7,11 +7,9 @@ SPDX-License-Identifier: MIT
import typing import typing
from abc import abstractmethod from abc import abstractmethod
from .base_types import Instruction
from .colors import * from .colors import *
if typing.TYPE_CHECKING:
from .Executable import LoadedInstruction
class RiscemuBaseException(BaseException): class RiscemuBaseException(BaseException):
@abstractmethod @abstractmethod
@ -116,7 +114,7 @@ class InvalidAllocationException(RiscemuBaseException):
class UnimplementedInstruction(RiscemuBaseException): class UnimplementedInstruction(RiscemuBaseException):
def __init__(self, ins: 'LoadedInstruction'): def __init__(self, ins: Instruction):
self.ins = ins self.ins = ins
def message(self): def message(self):

@ -5,7 +5,8 @@ SPDX-License-Identifier: MIT
""" """
from math import log10, ceil from math import log10, ceil
from .Exceptions import * from .exceptions import *
from typing import Iterable, Iterator, TypeVar, Generic, List
def align_addr(addr: int, to_bytes: int = 8) -> int: def align_addr(addr: int, to_bytes: int = 8) -> int:
@ -105,3 +106,36 @@ def bind_twos_complement(val):
elif val > 2147483647: elif val > 2147483647:
return val - 4294967296 return val - 4294967296
return val return val
T = TypeVar('T')
class Peekable(Generic[T], Iterator[T]):
def __init__(self, iterable: Iterable[T]):
self.iterable = iter(iterable)
self.cache: List[T] = list()
def __iter__(self) -> Iterator[T]:
return self
def __next__(self) -> T:
if self.cache:
return self.cache.pop()
return next(self.iterable)
def peek(self) -> T:
try:
if self.cache:
return self.cache[0]
pop = next(self.iterable)
self.cache.append(pop)
return pop
except StopIteration:
return None
def push_back(self, item: T):
self.cache = [item] + self.cache
def is_empty(self) -> bool:
return self.peek() is None

@ -9,7 +9,7 @@ from typing import Tuple, Callable, Dict
from abc import ABC from abc import ABC
from ..CPU import CPU from ..CPU import CPU
from ..helpers import ASSERT_LEN, ASSERT_IN, to_unsigned from ..helpers import ASSERT_LEN, ASSERT_IN, to_unsigned
from ..Executable import LoadedInstruction from ..base_types import LoadedInstruction
class InstructionSet(ABC): class InstructionSet(ABC):

@ -1,5 +1,5 @@
from .InstructionSet import InstructionSet, LoadedInstruction from .InstructionSet import InstructionSet, LoadedInstruction
from ..Exceptions import INS_NOT_IMPLEMENTED from ..exceptions import INS_NOT_IMPLEMENTED
from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed

@ -9,9 +9,9 @@ from .InstructionSet import *
from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed
from ..colors import FMT_DEBUG, FMT_NONE from ..colors import FMT_DEBUG, FMT_NONE
from ..debug import launch_debug_session from ..debug import launch_debug_session
from ..Exceptions import LaunchDebuggerException from ..exceptions import LaunchDebuggerException
from ..Syscall import Syscall from ..syscall import Syscall
from ..Executable import LoadedInstruction from ..base_types import LoadedInstruction
class RV32I(InstructionSet): class RV32I(InstructionSet):

@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT
""" """
from .InstructionSet import * from .InstructionSet import *
from ..Exceptions import INS_NOT_IMPLEMENTED from ..exceptions import INS_NOT_IMPLEMENTED
class RV32M(InstructionSet): class RV32M(InstructionSet):

@ -0,0 +1,79 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import re
from typing import Dict, Tuple, Iterable, Callable
from helpers import Peekable
from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
from .base_types import Program
from .colors import FMT_PARSE
from .exceptions import ParseException
from .tokenizer import Token, TokenType
from .types import SimpleInstruction
def parse_instruction(token: Token, args: Tuple[str], context: ParseContext):
if context.section is None or context.section.type != MemorySectionType.Instructions:
raise ParseException("{} {} encountered in invalid context: {}".format(token, args, context))
ins = SimpleInstruction(token.value, args, context.context, context.section.current_address())
context.section.data.append(ins)
def parse_label(token: Token, args: Tuple[str], context: ParseContext):
name = token.value[:-1]
if re.match(r'^\d+$', name):
# relative label:
context.context.numbered_labels[name].append(context.section.current_address())
else:
if name in context.context.labels:
print(FMT_PARSE + 'Warn: Symbol {} defined twice!'.format(name))
context.context.labels[name] = context.section.current_address()
PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {
TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction,
TokenType.LABEL: parse_label,
TokenType.INSTRUCTION_NAME: parse_instruction
}
def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
context = ParseContext(name)
for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
if token.type not in PARSERS:
raise ParseException("Unexpected token type: {}, {}".format(token, args))
PARSERS[token.type](token, args, context)
return context.finalize()
def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]:
tokens: Peekable[Token] = Peekable[Token](tokens_iter)
while not tokens.is_empty():
token = next(tokens)
if token.type in (TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME):
yield token, tuple(take_arguments(tokens))
def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
"""
Consumes (argument comma)* and yields argument.value until newline is reached
If an argument is not followed by either a newline or a comma, a parse exception is raised
The newline at the end is consumed
:param tokens: A Peekable iterator over some Tokens
"""
while True:
if tokens.peek().type == TokenType.ARGUMENT:
yield next(tokens).value
if tokens.peek().type == TokenType.COMMA:
next(tokens)
elif tokens.peek().type == TokenType.NEWLINE:
next(tokens)
break
raise ParseException("Expected newline, instead got {}".format(tokens.peek()))

@ -2,8 +2,8 @@ from dataclasses import dataclass
from typing import List, Dict, Tuple from typing import List, Dict, Tuple
from .Exceptions import * from .Exceptions import *
from ..Exceptions import RiscemuBaseException from ..exceptions import RiscemuBaseException
from ..Executable import MemoryFlags, LoadedMemorySection from ..base_types import MemoryFlags, LoadedMemorySection
from ..decoder import decode, RISCV_REGS, format_ins from ..decoder import decode, RISCV_REGS, format_ins
from ..helpers import FMT_PARSE, FMT_NONE, FMT_GREEN, FMT_BOLD from ..helpers import FMT_PARSE, FMT_NONE, FMT_GREEN, FMT_BOLD

@ -8,8 +8,8 @@ from typing import Dict, List, Optional, TYPE_CHECKING
from .ElfLoader import ElfInstruction, ElfLoadedMemorySection, InstructionAccessFault, InstructionAddressMisalignedTrap from .ElfLoader import ElfInstruction, ElfLoadedMemorySection, InstructionAccessFault, InstructionAddressMisalignedTrap
from .PrivMMU import PrivMMU from .PrivMMU import PrivMMU
from ..Config import RunConfig from ..config import RunConfig
from ..Executable import LoadedMemorySection, MemoryFlags from ..base_types import LoadedMemorySection, MemoryFlags
from ..IO.IOModule import IOModule from ..IO.IOModule import IOModule
from ..colors import FMT_ERROR, FMT_NONE, FMT_MEM from ..colors import FMT_ERROR, FMT_NONE, FMT_MEM
from ..decoder import decode from ..decoder import decode
@ -117,7 +117,7 @@ class MemoryImageMMU(PrivMMU):
return "{}{:+x} ({}:{})".format(sym, addr - val, sec.owner, sec.name) return "{}{:+x} ({}:{})".format(sym, addr - val, sec.owner, sec.name)
return "{}:{}{:+x}".format(sec.owner, sec.name, addr - sec.base) return "{}:{}{:+x}".format(sec.owner, sec.name, addr - sec.base)
def symbol(self, symb: str): def label(self, symb: str):
print(FMT_MEM + "Looking up symbol {}".format(symb)) print(FMT_MEM + "Looking up symbol {}".format(symb))
for owner, symbs in self.debug_info['symbols'].items(): for owner, symbs in self.debug_info['symbols'].items():
if symb in symbs: if symb in symbs:

@ -15,7 +15,7 @@ from ..IO import TextIO
from ..instructions import RV32A, RV32M from ..instructions import RV32A, RV32M
if typing.TYPE_CHECKING: if typing.TYPE_CHECKING:
from riscemu import Executable, LoadedExecutable, LoadedInstruction from riscemu import base_types, LoadedExecutable, LoadedInstruction
from riscemu.instructions.InstructionSet import InstructionSet from riscemu.instructions.InstructionSet import InstructionSet
@ -95,7 +95,7 @@ class PrivCPU(CPU):
print() print()
print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE) print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE)
def load(self, e: riscemu.Executable): def load(self, e: riscemu.base_types):
raise NotImplementedError("Not supported!") raise NotImplementedError("Not supported!")
def run_loaded(self, le: 'riscemu.LoadedExecutable'): def run_loaded(self, le: 'riscemu.LoadedExecutable'):

@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT
""" """
from ..instructions.RV32I import * from ..instructions.RV32I import *
from ..Exceptions import INS_NOT_IMPLEMENTED from ..exceptions import INS_NOT_IMPLEMENTED
from .Exceptions import * from .Exceptions import *
from .privmodes import PrivModes from .privmodes import PrivModes
from ..colors import FMT_CPU, FMT_NONE from ..colors import FMT_CPU, FMT_NONE

@ -4,10 +4,10 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT SPDX-License-Identifier: MIT
""" """
from .Config import RunConfig from .config import RunConfig
from .helpers import * from .helpers import *
from collections import defaultdict from collections import defaultdict
from .Exceptions import InvalidRegisterException from .exceptions import InvalidRegisterException
class Registers: class Registers:
""" """

@ -10,15 +10,16 @@ from enum import Enum, auto
from typing import List, Iterable from typing import List, Iterable
from riscemu.decoder import RISCV_REGS from riscemu.decoder import RISCV_REGS
from .Exceptions import ParseException from .exceptions import ParseException
LINE_COMMENT_STARTERS = ('#', ';', '//') LINE_COMMENT_STARTERS = ('#', ';', '//')
WHITESPACE_PATTERN = re.compile(r'\s+') WHITESPACE_PATTERN = re.compile(r'\s+')
MEMORY_ADDRESS_PATTERN = re.compile('^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$') MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$')
REGISTER_NAMES = RISCV_REGS REGISTER_NAMES = RISCV_REGS
I = lambda x: x I = lambda x: x
class TokenType(Enum): class TokenType(Enum):
COMMA = auto() COMMA = auto()
ARGUMENT = auto() ARGUMENT = auto()
@ -40,6 +41,7 @@ class Token:
return ', ' return ', '
return '{}({}) '.format(self.type.name[0:3], self.value) return '{}({}) '.format(self.type.name[0:3], self.value)
NEWLINE = Token(TokenType.NEWLINE, '\n') NEWLINE = Token(TokenType.NEWLINE, '\n')
COMMA = Token(TokenType.COMMA, ',') COMMA = Token(TokenType.COMMA, ',')

@ -0,0 +1,72 @@
from typing import List, Tuple
from .exceptions import MemoryAccessException
from .helpers import parse_numeric_argument
from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \
T_AbsoluteAddress
class SimpleInstruction(Instruction):
def __init__(self, name: str, args: Tuple[str], context: InstructionContext, addr: T_RelativeAddress):
self.context = context
self.name = name
self.args = args
self.addr = addr
def get_imm(self, num: int) -> int:
resolved_label = self.context.resolve_label(self.args[num], self.addr)
if resolved_label is None:
return parse_numeric_argument(self.args[num])
return resolved_label
def get_imm_reg(self, num: int) -> Tuple[int, str]:
return self.get_imm(num + 1), self.get_reg(num)
def get_reg(self, num: int) -> str:
return self.args[num]
class InstructionMemorySection(MemorySection):
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, base: int = 0):
self.name = name
self.base = base
self.context = context
self.size = len(instructions) * 4
self.flags = MemoryFlags(True, True)
self.instructions = instructions
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
if offset % 4 != 0:
raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
return self.instructions[offset // 4]
class BinaryDataMemorySection(MemorySection):
def __init__(self, data: bytearray, name: str, context: InstructionContext, base: int = 0):
self.name = name
self.base = base
self.context = context
self.size = len(data)
self.flags = MemoryFlags(False, False)
self.data = data
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
if offset + size > self.size:
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
return self.data[offset:offset + size]
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
if offset + size > self.size:
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
if len(data[0:size]) != size:
raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
self.data[offset:offset + size] = data[0:size]
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
offset, 4, 'instruction fetch')

@ -1,6 +1,6 @@
from unittest import TestCase from unittest import TestCase
from riscemu.Tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA
def ins(name: str) -> Token: def ins(name: str) -> Token:

Loading…
Cancel
Save