[restructured] moved all simple type definitions into riscemu.types

assembly-parser-rework
Anton Lydike 3 years ago
parent 254410e9cc
commit bc26ed3a02

@ -12,12 +12,11 @@ from typing import List, Type
import riscemu
from .config import RunConfig
from .MMU import MMU
from .base import BinaryDataMemorySection
from .colors import FMT_CPU, FMT_NONE
from .debug import launch_debug_session
from .exceptions import RiscemuBaseException, LaunchDebuggerException
from .syscall import SyscallInterface, get_syscall_symbols
from .types import CPU, ProgramLoader, Int32
from .types import CPU, ProgramLoader, Int32, BinaryDataMemorySection
from .parser import AssemblyFileLoader
if typing.TYPE_CHECKING:

@ -2,12 +2,11 @@ from enum import Enum, auto
from typing import List
from typing import Optional, Tuple, Union
from .base import BinaryDataMemorySection, InstructionMemorySection
from .colors import FMT_PARSE, FMT_NONE
from .exceptions import ParseException, ASSERT_LEN
from .helpers import parse_numeric_argument, align_addr, get_section_base_name
from .tokenizer import Token
from .types import Program, T_RelativeAddress, InstructionContext, Instruction, UInt32, Int32
from .types import Program, T_RelativeAddress, InstructionContext, Instruction, BinaryDataMemorySection, InstructionMemorySection
INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini')
"""

@ -1,82 +0,0 @@
"""
This file contains a base implementation of Instruction, and MemorySection.
This aims to be a simple base, usable for everyone who needs the basic functionality, but doesn't
want to set up their own subtypes of Instruction and MemorySection
"""
from typing import List, Tuple, Union
from .exceptions import MemoryAccessException
from .helpers import parse_numeric_argument
from .types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \
T_AbsoluteAddress, Program
class SimpleInstruction(Instruction):
def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]],
context: InstructionContext, addr: T_RelativeAddress):
self.context = context
self.name = name
self.args = args
self.addr = addr
def get_imm(self, num: int) -> int:
resolved_label = self.context.resolve_label(self.args[num], self.addr)
if resolved_label is None:
return parse_numeric_argument(self.args[num])
return resolved_label
def get_imm_reg(self, num: int) -> Tuple[int, str]:
return self.get_imm(num + 1), self.get_reg(num)
def get_reg(self, num: int) -> str:
return self.args[num]
class InstructionMemorySection(MemorySection):
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0):
self.name = name
self.base = base
self.context = context
self.size = len(instructions) * 4
self.flags = MemoryFlags(True, True)
self.instructions = instructions
self.owner = owner
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
if offset % 4 != 0:
raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
return self.instructions[offset // 4]
class BinaryDataMemorySection(MemorySection):
def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None):
self.name = name
self.base = base
self.context = context
self.size = len(data)
self.flags = flags if flags is not None else MemoryFlags(False, False)
self.data = data
self.owner = owner
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
if offset + size > self.size:
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
return self.data[offset:offset + size]
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
if offset + size > self.size:
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
if len(data[0:size]) != size:
raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
self.data[offset:offset + size] = data[0:size]
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
offset, 4, 'instruction fetch')

@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT
"""
import os.path
from .base import SimpleInstruction
from .types import SimpleInstruction
from .helpers import *
if typing.TYPE_CHECKING:

@ -1,6 +1,5 @@
from riscemu import RunConfig
from riscemu.base import InstructionMemorySection, SimpleInstruction
from riscemu.types import InstructionContext, Program
from riscemu.types import InstructionMemorySection, SimpleInstruction, Program
if __name__ == '__main__':
from .CPU import UserModeCPU

@ -3,17 +3,15 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import os
import re
from typing import Dict, Tuple, Iterable, Callable, List
from .helpers import Peekable
from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
from .types import Program, T_ParserOpts, ProgramLoader
from .colors import FMT_PARSE
from .exceptions import ParseException
from .helpers import Peekable
from .tokenizer import Token, TokenType, tokenize
from .base import SimpleInstruction
from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction
def parse_instruction(token: Token, args: Tuple[str], context: ParseContext):

@ -191,6 +191,7 @@ class PrivCPU(CPU):
trap = self.pending_traps.pop() # use the most recent trap
if self.conf.verbosity > 0:
print(FMT_CPU + "[CPU] taking trap {}!".format(trap) + FMT_NONE)
self.regs.dump_reg_a()
if trap.priv != PrivModes.MACHINE:
print(FMT_CPU + "[CPU] Trap not targeting machine mode encountered! - undefined behaviour!" + FMT_NONE)

@ -4,12 +4,11 @@ from dataclasses import dataclass
from functools import lru_cache
from typing import Tuple, Dict, Set
from riscemu import MemoryAccessException
from riscemu.priv.Exceptions import InstructionAccessFault, InstructionAddressMisalignedTrap, LoadAccessFault
from riscemu.types import Instruction, InstructionContext, T_RelativeAddress, MemoryFlags, T_AbsoluteAddress
from riscemu.base import BinaryDataMemorySection
from riscemu.colors import FMT_NONE, FMT_PARSE
from riscemu.decoder import format_ins, RISCV_REGS, decode
from riscemu.priv.Exceptions import InstructionAccessFault, InstructionAddressMisalignedTrap, LoadAccessFault
from riscemu.types import Instruction, InstructionContext, T_RelativeAddress, MemoryFlags, T_AbsoluteAddress, \
BinaryDataMemorySection
@dataclass(frozen=True)

@ -1,656 +0,0 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
This file contains abstract base classes and types, bundling only the absolute basic functionality
See base.py for some basic implementations of these classes
"""
import os
import re
import typing
from abc import ABC, abstractmethod
from collections import defaultdict
from ctypes import c_uint32, c_int32
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Set, Union, Iterator, Callable, Type
from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_RED, FMT_BOLD
from .config import RunConfig
from .exceptions import ParseException
from .helpers import format_bytes, get_section_base_name
from .registers import Registers
if typing.TYPE_CHECKING:
from .MMU import MMU
from .instructions.instruction_set import InstructionSet
# define some base type aliases so we can keep track of absolute and relative addresses
T_RelativeAddress = int
T_AbsoluteAddress = int
# parser options are just dictionaries with arbitrary values
T_ParserOpts = Dict[str, any]
NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
class Int32:
_type = c_int32
__slots__ = ('_val',)
def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0):
if isinstance(val, (bytes, bytearray)):
self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True))
elif isinstance(val, self.__class__._type):
self._val = val
elif isinstance(val, (c_uint32, c_int32, Int32)):
self._val = self.__class__._type(val.value)
elif isinstance(val, int):
self._val = self.__class__._type(val)
else:
raise RuntimeError(
"Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val)
)
def __add__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value + other)
def __sub__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value - other)
def __mul__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value * other)
def __truediv__(self, other):
return self // other
def __floordiv__(self, other):
if isinstance(other, Int32):
other = other.value
return self.__class__(self.value // other)
def __mod__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value % other)
def __and__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value & other)
def __or__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value | other)
def __xor__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value ^ other)
def __lshift__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self.value << other)
def __rshift__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self.value >> other)
def __eq__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.value == other
def __neg__(self):
return self.__class__(-self._val.value)
def __abs__(self):
return self.__class__(abs(self.value))
def __bytes__(self):
return self.to_bytes(4)
def __repr__(self):
return '{}({})'.format(self.__class__.__name__, self.value)
def __str__(self):
return str(self.value)
def __format__(self, format_spec):
return self.value.__format__(format_spec)
def __hash__(self):
return hash(self.value)
def __gt__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value > other
def __lt__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value < other
def __le__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value <= other
def __ge__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value >= other
def __bool__(self):
return bool(self.value)
def __cmp__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value.__cmp__(other)
# right handed binary operators
def __radd__(self, other):
return self + other
def __rsub__(self, other):
return self.__class__(other) - self
def __rmul__(self, other):
return self * other
def __rtruediv__(self, other):
return self.__class__(other) // self
def __rfloordiv__(self, other):
return self.__class__(other) // self
def __rmod__(self, other):
return self.__class__(other) % self
def __rand__(self, other):
return self.__class__(other) & self
def __ror__(self, other):
return self.__class__(other) | self
def __rxor__(self, other):
return self.__class__(other) ^ self
@property
def value(self):
return self._val.value
def unsigned(self) -> 'UInt32':
return UInt32(self)
def to_bytes(self, bytes: int = 4) -> bytearray:
return bytearray(self.unsigned_value.to_bytes(bytes, 'little'))
def signed(self) -> 'Int32':
if self.__class__ == Int32:
return self
return Int32(self)
@property
def unsigned_value(self):
return c_uint32(self.value).value
def shift_right_logical(self, ammount: Union['Int32', int]):
if isinstance(ammount, Int32):
ammount = ammount.value
return self.__class__((self.value % 0x100000000) >> ammount)
def __int__(self):
return self.value
def __hex__(self):
return hex(self.value)
class UInt32(Int32):
_type = c_uint32
def unsigned(self) -> 'UInt32':
return self
@property
def unsigned_value(self):
return self._val.value
def shift_right_logical(self, ammount: Union['Int32', int]):
return self >> ammount
@dataclass(frozen=True)
class MemoryFlags:
read_only: bool
executable: bool
def __repr__(self):
return "r{}{}".format(
'-' if self.read_only else 'w',
'x' if self.executable else '-'
)
class InstructionContext:
base_address: T_AbsoluteAddress
"""
The address where the instruction block is placed
"""
labels: Dict[str, T_RelativeAddress]
"""
This dictionary maps all labels to their relative position of the instruction block
"""
numbered_labels: Dict[str, List[T_RelativeAddress]]
"""
This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where
the label was placed
"""
global_symbol_dict: Dict[str, T_AbsoluteAddress]
"""
A reference to the MMU's global symbol dictionary for access to global symbols
"""
def __init__(self):
self.labels = dict()
self.numbered_labels = defaultdict(list)
self.base_address = 0
self.global_symbol_dict = dict()
def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]:
if NUMBER_SYMBOL_PATTERN.match(symbol):
if address_at is None:
raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
direction = symbol[-1]
values = self.numbered_labels.get(symbol[:-1], [])
if direction == 'b':
return max((addr + self.base_address for addr in values if addr < address_at), default=None)
else:
return min((addr + self.base_address for addr in values if addr > address_at), default=None)
else:
# if it's not a local symbol, try the globals
if symbol not in self.labels:
return self.global_symbol_dict.get(symbol, None)
# otherwise return the local symbol
return self.labels.get(symbol, None)
class Instruction(ABC):
name: str
args: tuple
@abstractmethod
def get_imm(self, num: int) -> int:
"""
parse and get immediate argument
"""
pass
@abstractmethod
def get_imm_reg(self, num: int) -> Tuple[int, str]:
"""
parse and get an argument imm(reg)
"""
pass
@abstractmethod
def get_reg(self, num: int) -> str:
"""
parse and get an register argument
"""
pass
def __repr__(self):
return "{} {}".format(self.name, ", ".join(self.args))
@dataclass
class MemorySection(ABC):
name: str
flags: MemoryFlags
size: int
base: T_AbsoluteAddress
owner: str
context: InstructionContext
@property
def end(self):
return self.base + self.size
@abstractmethod
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
pass
@abstractmethod
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
pass
@abstractmethod
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
pass
def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
bytes_per_row: int = 16, rows: int = 10, group: int = 4):
if self.flags.executable:
bytes_per_row = 4
highlight = None
if end is None:
end = min(start + (bytes_per_row * (rows // 2)), self.size - 1)
highlight = start
start = max(0, start - (bytes_per_row * (rows // 2)))
if self.flags.executable:
print(FMT_MEM + "{}, viewing {} instructions:".format(
self, (end - start) // 4
) + FMT_NONE)
for addr in range(start, end, 4):
if addr == highlight:
print(FMT_UNDERLINE + FMT_ORANGE, end='')
print("0x{:04x}: {}{}".format(
self.base + addr, self.read_ins(addr), FMT_NONE
))
else:
print(FMT_MEM + "{}, viewing {} bytes:".format(
self, (end - start)
) + FMT_NONE)
aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end
for addr in range(start, aligned_end, bytes_per_row):
hi_ind = (highlight - addr) // group if highlight is not None else -1
print("0x{:04x}: {}{}".format(
self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
))
if aligned_end != end:
hi_ind = (highlight - aligned_end) // group if highlight is not None else -1
print("0x{:04x}: {}{}".format(
self.base + aligned_end, format_bytes(
self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind
), FMT_NONE
))
def dump_all(self, *args, **kwargs):
self.dump(0, self.size, *args, **kwargs)
def __repr__(self):
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
self.__class__.__name__,
self.name,
self.base,
self.size,
self.flags,
self.owner
)
class Program:
"""
This represents a collection of sections which together form an executable program
When you want to create a program which can be located anywhere in memory, set base to None,
this signals the other components, that this is relocatable. Set the base of each section to
the offset in the program, and everything will be taken care of for you.
"""
name: str
context: InstructionContext
global_labels: Set[str]
relative_labels: Set[str]
sections: List[MemorySection]
base: Optional[T_AbsoluteAddress]
is_loaded: bool
@property
def size(self):
if len(self.sections) == 0:
return 0
if self.base is None:
return self.sections[-1].base + self.sections[-1].size
return (self.sections[-1].base - self.base) + self.sections[-1].size
def __init__(self, name: str, base: Optional[int] = None):
self.name = name
self.context = InstructionContext()
self.sections = []
self.global_labels = set()
self.relative_labels = set()
self.base = base
self.is_loaded = False
def add_section(self, sec: MemorySection):
# print a warning when a section is located before the programs base
if self.base is not None:
if sec.base < self.base:
print(
FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format(
sec, self.name, self.base
) + FMT_NONE)
self.sections.append(sec)
# keep section list ordered
self.sections.sort(key=lambda section: section.base)
def __repr__(self):
return "{}(name={},sections={},base={})".format(
self.__class__.__name__, self.name, self.global_labels,
[s.name for s in self.sections], self.base
)
@property
def entrypoint(self):
if '_start' in self.context.labels:
return self.context.labels.get('_start')
if 'main' in self.context.labels:
return self.context.labels.get('main')
for sec in self.sections:
if get_section_base_name(sec.name) == '.text' and sec.flags.executable:
return sec.base
def loaded_trigger(self, at_addr: T_AbsoluteAddress):
"""
This trigger is called when the binary is loaded and its final address in memory is determined
This will do a small sanity check to prevent programs loading twice, or at addresses they don't
expect to be loaded.
Then it will finalize all relative symbols defined in it to point to the correct addresses.
:param at_addr: the address where the program will be located
"""
if self.is_loaded:
if at_addr != self.base:
raise RuntimeError("Program loaded twice at different addresses! This will probably break things!")
return
if self.base is not None and self.base != at_addr:
print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, '
'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE)
# check if we are relocating
if self.base != at_addr:
offset = at_addr if self.base is None else at_addr - self.base
# move all sections by the offset
for sec in self.sections:
sec.base += offset
# move all relative symbols by the offset
for name in self.relative_labels:
self.context.labels[name] += offset
self.base = at_addr
self.context.base_address = at_addr
class ProgramLoader(ABC):
"""
A program loader is always specific to a given source file. It is a place to store all state
concerning the parsing and loading of that specific source file, including options.
"""
def __init__(self, source_path: str, options: T_ParserOpts):
self.source_path = source_path
self.options = options
self.filename = os.path.split(self.source_path)[-1]
@classmethod
@abstractmethod
def can_parse(cls, source_path: str) -> float:
"""
Return confidence that the file located at source_path
should be parsed and loaded by this loader
:param source_path: the path of the source file
:return: the confidence that this file belongs to this parser
"""
pass
@classmethod
@abstractmethod
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
"""
parse command line args into an options dictionary
:param argv: the command line args list
:return: all remaining command line args and the parser options object
"""
pass
@classmethod
def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
"""
Instantiate a loader for the given source file with the required arguments
:param source_path: the path to the source file
:param options: the parsed options (guaranteed to come from this classes get_options method.
:return: An instance of a ProgramLoader for the spcified source
"""
return cls(source_path, options)
@abstractmethod
def parse(self) -> Union[Program, Iterator[Program]]:
"""
:return:
"""
pass
class CPU(ABC):
# static cpu configuration
INS_XLEN: int = 4
# housekeeping variables
regs: Registers
mmu: 'MMU'
pc: T_AbsoluteAddress
cycle: int
halted: bool
# debugging context
debugger_active: bool
# instruction information
instructions: Dict[str, Callable[[Instruction], None]]
instruction_sets: Set['InstructionSet']
# configuration
conf: RunConfig
def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig):
self.mmu = mmu
self.regs = Registers()
self.conf = conf
self.instruction_sets = set()
self.instructions = dict()
for set_class in instruction_sets:
ins_set = set_class(self)
self.instructions.update(ins_set.load())
self.instruction_sets.add(ins_set)
self.halted = False
self.cycle = 0
self.pc = 0
self.debugger_active = False
def run_instruction(self, ins: Instruction):
"""
Execute a single instruction
:param ins: The instruction to execute
"""
if ins.name in self.instructions:
self.instructions[ins.name](ins)
else:
# this should never be reached, as unknown instructions are imparseable
raise RuntimeError("Unknown instruction: {}".format(ins))
def load_program(self, program: Program):
self.mmu.load_program(program)
def __repr__(self):
"""
Returns a representation of the CPU and some of its state.
"""
return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format(
self.__class__.__name__,
self.pc,
self.cycle,
self.halted,
" ".join(s.name for s in self.instruction_sets)
)
@abstractmethod
def step(self, verbose=False):
pass
@abstractmethod
def run(self, verbose=False):
pass
def launch(self, program: Program, verbose: bool = False):
if program not in self.mmu.programs:
print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE)
return
self.pc = program.entrypoint
self.run(verbose)
@classmethod
@abstractmethod
def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
pass
def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]:
return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name))
@property
def sections(self):
return self.mmu.sections
@property
def programs(self):
return self.mmu.programs

@ -0,0 +1,26 @@
from typing import Dict
import re
# define some base type aliases so we can keep track of absolute and relative addresses
T_RelativeAddress = int
T_AbsoluteAddress = int
# parser options are just dictionaries with arbitrary values
T_ParserOpts = Dict[str, any]
NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
from .flags import MemoryFlags
from .int32 import UInt32, Int32
from .instruction import Instruction
from .instruction_context import InstructionContext
from .memory_section import MemorySection
from .program import Program
from .program_loader import ProgramLoader
from .cpu import CPU
from .simple_instruction import SimpleInstruction
from .instruction_memory_section import InstructionMemorySection
from .binary_data_memory_section import BinaryDataMemorySection

@ -0,0 +1,29 @@
from . import MemorySection, InstructionContext, MemoryFlags, T_RelativeAddress, Instruction
from ..exceptions import MemoryAccessException
class BinaryDataMemorySection(MemorySection):
def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None):
self.name = name
self.base = base
self.context = context
self.size = len(data)
self.flags = flags if flags is not None else MemoryFlags(False, False)
self.data = data
self.owner = owner
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
if offset + size > self.size:
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
return self.data[offset:offset + size]
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
if offset + size > self.size:
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
if len(data[0:size]) != size:
raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
self.data[offset:offset + size] = data[0:size]
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
offset, 4, 'instruction fetch')

@ -0,0 +1,107 @@
import typing
from abc import ABC, abstractmethod
from typing import List, Type, Callable, Set, Dict
from ..registers import Registers
from ..config import RunConfig
from ..colors import FMT_RED, FMT_NONE
from . import T_AbsoluteAddress, Instruction, Program, ProgramLoader
class CPU(ABC):
# static cpu configuration
INS_XLEN: int = 4
# housekeeping variables
regs: Registers
mmu: 'MMU'
pc: T_AbsoluteAddress
cycle: int
halted: bool
# debugging context
debugger_active: bool
# instruction information
instructions: Dict[str, Callable[[Instruction], None]]
instruction_sets: Set['InstructionSet']
# configuration
conf: RunConfig
def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig):
self.mmu = mmu
self.regs = Registers()
self.conf = conf
self.instruction_sets = set()
self.instructions = dict()
for set_class in instruction_sets:
ins_set = set_class(self)
self.instructions.update(ins_set.load())
self.instruction_sets.add(ins_set)
self.halted = False
self.cycle = 0
self.pc = 0
self.debugger_active = False
def run_instruction(self, ins: Instruction):
"""
Execute a single instruction
:param ins: The instruction to execute
"""
if ins.name in self.instructions:
self.instructions[ins.name](ins)
else:
# this should never be reached, as unknown instructions are imparseable
raise RuntimeError("Unknown instruction: {}".format(ins))
def load_program(self, program: Program):
self.mmu.load_program(program)
def __repr__(self):
"""
Returns a representation of the CPU and some of its state.
"""
return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format(
self.__class__.__name__,
self.pc,
self.cycle,
self.halted,
" ".join(s.name for s in self.instruction_sets)
)
@abstractmethod
def step(self, verbose=False):
pass
@abstractmethod
def run(self, verbose=False):
pass
def launch(self, program: Program, verbose: bool = False):
if program not in self.mmu.programs:
print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE)
return
self.pc = program.entrypoint
self.run(verbose)
@classmethod
@abstractmethod
def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
pass
def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]:
return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name))
@property
def sections(self):
return self.mmu.sections
@property
def programs(self):
return self.mmu.programs

@ -0,0 +1,13 @@
from dataclasses import dataclass
@dataclass(frozen=True)
class MemoryFlags:
read_only: bool
executable: bool
def __repr__(self):
return "r{}{}".format(
'-' if self.read_only else 'w',
'x' if self.executable else '-'
)

@ -0,0 +1,31 @@
from abc import ABC, abstractmethod
from typing import Tuple
class Instruction(ABC):
name: str
args: tuple
@abstractmethod
def get_imm(self, num: int) -> int:
"""
parse and get immediate argument
"""
pass
@abstractmethod
def get_imm_reg(self, num: int) -> Tuple[int, str]:
"""
parse and get an argument imm(reg)
"""
pass
@abstractmethod
def get_reg(self, num: int) -> str:
"""
parse and get an register argument
"""
pass
def __repr__(self):
return "{} {}".format(self.name, ", ".join(self.args))

@ -0,0 +1,53 @@
from collections import defaultdict
from typing import Dict, List, Optional
from ..exceptions import ParseException
from ..types import T_AbsoluteAddress, T_RelativeAddress, NUMBER_SYMBOL_PATTERN
class InstructionContext:
base_address: T_AbsoluteAddress
"""
The address where the instruction block is placed
"""
labels: Dict[str, T_RelativeAddress]
"""
This dictionary maps all labels to their relative position of the instruction block
"""
numbered_labels: Dict[str, List[T_RelativeAddress]]
"""
This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where
the label was placed
"""
global_symbol_dict: Dict[str, T_AbsoluteAddress]
"""
A reference to the MMU's global symbol dictionary for access to global symbols
"""
def __init__(self):
self.labels = dict()
self.numbered_labels = defaultdict(list)
self.base_address = 0
self.global_symbol_dict = dict()
def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]:
if NUMBER_SYMBOL_PATTERN.match(symbol):
if address_at is None:
raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
direction = symbol[-1]
values = self.numbered_labels.get(symbol[:-1], [])
if direction == 'b':
return max((addr + self.base_address for addr in values if addr < address_at), default=None)
else:
return min((addr + self.base_address for addr in values if addr > address_at), default=None)
else:
# if it's not a local symbol, try the globals
if symbol not in self.labels:
return self.global_symbol_dict.get(symbol, None)
# otherwise return the local symbol
return self.labels.get(symbol, None)

@ -0,0 +1,27 @@
from typing import List
from . import MemorySection, Instruction, InstructionContext, MemoryFlags, T_RelativeAddress
from .. import MemoryAccessException
class InstructionMemorySection(MemorySection):
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0):
self.name = name
self.base = base
self.context = context
self.size = len(instructions) * 4
self.flags = MemoryFlags(True, True)
self.instructions = instructions
self.owner = owner
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
if offset % 4 != 0:
raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
return self.instructions[offset // 4]

@ -0,0 +1,202 @@
from typing import Union
from ctypes import c_int32, c_uint32
class Int32:
_type = c_int32
__slots__ = ('_val',)
def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0):
if isinstance(val, (bytes, bytearray)):
self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True))
elif isinstance(val, self.__class__._type):
self._val = val
elif isinstance(val, (c_uint32, c_int32, Int32)):
self._val = self.__class__._type(val.value)
elif isinstance(val, int):
self._val = self.__class__._type(val)
else:
raise RuntimeError(
"Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val)
)
def __add__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value + other)
def __sub__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value - other)
def __mul__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value * other)
def __truediv__(self, other):
return self // other
def __floordiv__(self, other):
if isinstance(other, Int32):
other = other.value
return self.__class__(self.value // other)
def __mod__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value % other)
def __and__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value & other)
def __or__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value | other)
def __xor__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value ^ other)
def __lshift__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self.value << other)
def __rshift__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self.value >> other)
def __eq__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.value == other
def __neg__(self):
return self.__class__(-self._val.value)
def __abs__(self):
return self.__class__(abs(self.value))
def __bytes__(self):
return self.to_bytes(4)
def __repr__(self):
return '{}({})'.format(self.__class__.__name__, self.value)
def __str__(self):
return str(self.value)
def __format__(self, format_spec):
return self.value.__format__(format_spec)
def __hash__(self):
return hash(self.value)
def __gt__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value > other
def __lt__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value < other
def __le__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value <= other
def __ge__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value >= other
def __bool__(self):
return bool(self.value)
def __cmp__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value.__cmp__(other)
# right handed binary operators
def __radd__(self, other):
return self + other
def __rsub__(self, other):
return self.__class__(other) - self
def __rmul__(self, other):
return self * other
def __rtruediv__(self, other):
return self.__class__(other) // self
def __rfloordiv__(self, other):
return self.__class__(other) // self
def __rmod__(self, other):
return self.__class__(other) % self
def __rand__(self, other):
return self.__class__(other) & self
def __ror__(self, other):
return self.__class__(other) | self
def __rxor__(self, other):
return self.__class__(other) ^ self
@property
def value(self):
return self._val.value
def unsigned(self) -> 'UInt32':
return UInt32(self)
def to_bytes(self, bytes: int = 4) -> bytearray:
return bytearray(self.unsigned_value.to_bytes(bytes, 'little'))
def signed(self) -> 'Int32':
if self.__class__ == Int32:
return self
return Int32(self)
@property
def unsigned_value(self):
return c_uint32(self.value).value
def shift_right_logical(self, ammount: Union['Int32', int]):
if isinstance(ammount, Int32):
ammount = ammount.value
return self.__class__((self.value % 0x100000000) >> ammount)
def __int__(self):
return self.value
def __hex__(self):
return hex(self.value)
class UInt32(Int32):
_type = c_uint32
def unsigned(self) -> 'UInt32':
return self
@property
def unsigned_value(self):
return self._val.value
def shift_right_logical(self, ammount: Union['Int32', int]):
return self >> ammount

@ -0,0 +1,88 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional
from ..colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE
from ..helpers import format_bytes
from . import MemoryFlags, T_AbsoluteAddress, InstructionContext, T_RelativeAddress, Instruction
@dataclass
class MemorySection(ABC):
name: str
flags: MemoryFlags
size: int
base: T_AbsoluteAddress
owner: str
context: InstructionContext
@property
def end(self):
return self.base + self.size
@abstractmethod
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
pass
@abstractmethod
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
pass
@abstractmethod
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
pass
def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
bytes_per_row: int = 16, rows: int = 10, group: int = 4):
if self.flags.executable:
bytes_per_row = 4
highlight = None
if end is None:
end = min(start + (bytes_per_row * (rows // 2)), self.size - 1)
highlight = start
start = max(0, start - (bytes_per_row * (rows // 2)))
if self.flags.executable:
print(FMT_MEM + "{}, viewing {} instructions:".format(
self, (end - start) // 4
) + FMT_NONE)
for addr in range(start, end, 4):
if addr == highlight:
print(FMT_UNDERLINE + FMT_ORANGE, end='')
print("0x{:04x}: {}{}".format(
self.base + addr, self.read_ins(addr), FMT_NONE
))
else:
print(FMT_MEM + "{}, viewing {} bytes:".format(
self, (end - start)
) + FMT_NONE)
aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end
for addr in range(start, aligned_end, bytes_per_row):
hi_ind = (highlight - addr) // group if highlight is not None else -1
print("0x{:04x}: {}{}".format(
self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
))
if aligned_end != end:
hi_ind = (highlight - aligned_end) // group if highlight is not None else -1
print("0x{:04x}: {}{}".format(
self.base + aligned_end, format_bytes(
self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind
), FMT_NONE
))
def dump_all(self, *args, **kwargs):
self.dump(0, self.size, *args, **kwargs)
def __repr__(self):
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
self.__class__.__name__,
self.name,
self.base,
self.size,
self.flags,
self.owner
)

@ -0,0 +1,104 @@
from typing import List, Optional, Set
from ..colors import FMT_RED, FMT_BOLD, FMT_NONE, FMT_MEM
from ..helpers import get_section_base_name
from . import InstructionContext, T_AbsoluteAddress, MemorySection
class Program:
"""
This represents a collection of sections which together form an executable program
When you want to create a program which can be located anywhere in memory, set base to None,
this signals the other components, that this is relocatable. Set the base of each section to
the offset in the program, and everything will be taken care of for you.
"""
name: str
context: InstructionContext
global_labels: Set[str]
relative_labels: Set[str]
sections: List[MemorySection]
base: Optional[T_AbsoluteAddress]
is_loaded: bool
@property
def size(self):
if len(self.sections) == 0:
return 0
if self.base is None:
return self.sections[-1].base + self.sections[-1].size
return (self.sections[-1].base - self.base) + self.sections[-1].size
def __init__(self, name: str, base: Optional[int] = None):
self.name = name
self.context = InstructionContext()
self.sections = []
self.global_labels = set()
self.relative_labels = set()
self.base = base
self.is_loaded = False
def add_section(self, sec: MemorySection):
# print a warning when a section is located before the programs base
if self.base is not None:
if sec.base < self.base:
print(
FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format(
sec, self.name, self.base
) + FMT_NONE)
self.sections.append(sec)
# keep section list ordered
self.sections.sort(key=lambda section: section.base)
def __repr__(self):
return "{}(name={},sections={},base={})".format(
self.__class__.__name__, self.name, self.global_labels,
[s.name for s in self.sections], self.base
)
@property
def entrypoint(self):
if '_start' in self.context.labels:
return self.context.labels.get('_start')
if 'main' in self.context.labels:
return self.context.labels.get('main')
for sec in self.sections:
if get_section_base_name(sec.name) == '.text' and sec.flags.executable:
return sec.base
def loaded_trigger(self, at_addr: T_AbsoluteAddress):
"""
This trigger is called when the binary is loaded and its final address in memory is determined
This will do a small sanity check to prevent programs loading twice, or at addresses they don't
expect to be loaded.
Then it will finalize all relative symbols defined in it to point to the correct addresses.
:param at_addr: the address where the program will be located
"""
if self.is_loaded:
if at_addr != self.base:
raise RuntimeError("Program loaded twice at different addresses! This will probably break things!")
return
if self.base is not None and self.base != at_addr:
print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, '
'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE)
# check if we are relocating
if self.base != at_addr:
offset = at_addr if self.base is None else at_addr - self.base
# move all sections by the offset
for sec in self.sections:
sec.base += offset
# move all relative symbols by the offset
for name in self.relative_labels:
self.context.labels[name] += offset
self.base = at_addr
self.context.base_address = at_addr

@ -0,0 +1,58 @@
import os
from abc import abstractmethod, ABC
from typing import Union, Iterator, List
from . import T_ParserOpts, Program
class ProgramLoader(ABC):
"""
A program loader is always specific to a given source file. It is a place to store all state
concerning the parsing and loading of that specific source file, including options.
"""
def __init__(self, source_path: str, options: T_ParserOpts):
self.source_path = source_path
self.options = options
self.filename = os.path.split(self.source_path)[-1]
@classmethod
@abstractmethod
def can_parse(cls, source_path: str) -> float:
"""
Return confidence that the file located at source_path
should be parsed and loaded by this loader
:param source_path: the path of the source file
:return: the confidence that this file belongs to this parser
"""
pass
@classmethod
@abstractmethod
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
"""
parse command line args into an options dictionary
:param argv: the command line args list
:return: all remaining command line args and the parser options object
"""
pass
@classmethod
def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
"""
Instantiate a loader for the given source file with the required arguments
:param source_path: the path to the source file
:param options: the parsed options (guaranteed to come from this classes get_options method.
:return: An instance of a ProgramLoader for the spcified source
"""
return cls(source_path, options)
@abstractmethod
def parse(self) -> Union[Program, Iterator[Program]]:
"""
:return:
"""
pass

@ -0,0 +1,26 @@
from typing import Union, Tuple
from . import Instruction, T_RelativeAddress, InstructionContext
from ..helpers import parse_numeric_argument
class SimpleInstruction(Instruction):
def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]],
context: InstructionContext, addr: T_RelativeAddress):
self.context = context
self.name = name
self.args = args
self.addr = addr
def get_imm(self, num: int) -> int:
resolved_label = self.context.resolve_label(self.args[num], self.addr)
if resolved_label is None:
return parse_numeric_argument(self.args[num])
return resolved_label
def get_imm_reg(self, num: int) -> Tuple[int, str]:
return self.get_imm(num + 1), self.get_reg(num)
def get_reg(self, num: int) -> str:
return self.args[num]

@ -23,7 +23,7 @@ setuptools.setup(
"Operating System :: OS Independent",
],
package_dir={"": "."},
packages=["riscemu", "riscemu.decoder", "riscemu.instructions", "riscemu.IO", "riscemu.priv"],
packages=["riscemu", "riscemu.decoder", "riscemu.instructions", "riscemu.IO", "riscemu.priv", "riscemu.types"],
python_requires=">=3.6",
install_requires=[
"pyelftools~=0.27"

Loading…
Cancel
Save