[restructured] moved all simple type definitions into riscemu.types
parent
254410e9cc
commit
bc26ed3a02
@ -1,82 +0,0 @@
|
|||||||
"""
|
|
||||||
This file contains a base implementation of Instruction, and MemorySection.
|
|
||||||
|
|
||||||
This aims to be a simple base, usable for everyone who needs the basic functionality, but doesn't
|
|
||||||
want to set up their own subtypes of Instruction and MemorySection
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import List, Tuple, Union
|
|
||||||
from .exceptions import MemoryAccessException
|
|
||||||
from .helpers import parse_numeric_argument
|
|
||||||
from .types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \
|
|
||||||
T_AbsoluteAddress, Program
|
|
||||||
|
|
||||||
|
|
||||||
class SimpleInstruction(Instruction):
|
|
||||||
def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]],
|
|
||||||
context: InstructionContext, addr: T_RelativeAddress):
|
|
||||||
self.context = context
|
|
||||||
self.name = name
|
|
||||||
self.args = args
|
|
||||||
self.addr = addr
|
|
||||||
|
|
||||||
def get_imm(self, num: int) -> int:
|
|
||||||
resolved_label = self.context.resolve_label(self.args[num], self.addr)
|
|
||||||
if resolved_label is None:
|
|
||||||
return parse_numeric_argument(self.args[num])
|
|
||||||
return resolved_label
|
|
||||||
|
|
||||||
def get_imm_reg(self, num: int) -> Tuple[int, str]:
|
|
||||||
return self.get_imm(num + 1), self.get_reg(num)
|
|
||||||
|
|
||||||
def get_reg(self, num: int) -> str:
|
|
||||||
return self.args[num]
|
|
||||||
|
|
||||||
|
|
||||||
class InstructionMemorySection(MemorySection):
|
|
||||||
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0):
|
|
||||||
self.name = name
|
|
||||||
self.base = base
|
|
||||||
self.context = context
|
|
||||||
self.size = len(instructions) * 4
|
|
||||||
self.flags = MemoryFlags(True, True)
|
|
||||||
self.instructions = instructions
|
|
||||||
self.owner = owner
|
|
||||||
|
|
||||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
|
||||||
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
|
|
||||||
|
|
||||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
|
||||||
raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
|
|
||||||
|
|
||||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
|
||||||
if offset % 4 != 0:
|
|
||||||
raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
|
|
||||||
return self.instructions[offset // 4]
|
|
||||||
|
|
||||||
|
|
||||||
class BinaryDataMemorySection(MemorySection):
|
|
||||||
def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None):
|
|
||||||
self.name = name
|
|
||||||
self.base = base
|
|
||||||
self.context = context
|
|
||||||
self.size = len(data)
|
|
||||||
self.flags = flags if flags is not None else MemoryFlags(False, False)
|
|
||||||
self.data = data
|
|
||||||
self.owner = owner
|
|
||||||
|
|
||||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
|
||||||
if offset + size > self.size:
|
|
||||||
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
|
|
||||||
return self.data[offset:offset + size]
|
|
||||||
|
|
||||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
|
||||||
if offset + size > self.size:
|
|
||||||
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
|
|
||||||
if len(data[0:size]) != size:
|
|
||||||
raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
|
|
||||||
self.data[offset:offset + size] = data[0:size]
|
|
||||||
|
|
||||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
|
||||||
raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
|
|
||||||
offset, 4, 'instruction fetch')
|
|
@ -1,656 +0,0 @@
|
|||||||
"""
|
|
||||||
RiscEmu (c) 2021 Anton Lydike
|
|
||||||
|
|
||||||
SPDX-License-Identifier: MIT
|
|
||||||
|
|
||||||
This file contains abstract base classes and types, bundling only the absolute basic functionality
|
|
||||||
|
|
||||||
See base.py for some basic implementations of these classes
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import typing
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from collections import defaultdict
|
|
||||||
from ctypes import c_uint32, c_int32
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Dict, List, Optional, Tuple, Set, Union, Iterator, Callable, Type
|
|
||||||
|
|
||||||
from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_RED, FMT_BOLD
|
|
||||||
from .config import RunConfig
|
|
||||||
from .exceptions import ParseException
|
|
||||||
from .helpers import format_bytes, get_section_base_name
|
|
||||||
from .registers import Registers
|
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
|
||||||
from .MMU import MMU
|
|
||||||
from .instructions.instruction_set import InstructionSet
|
|
||||||
|
|
||||||
# define some base type aliases so we can keep track of absolute and relative addresses
|
|
||||||
T_RelativeAddress = int
|
|
||||||
T_AbsoluteAddress = int
|
|
||||||
|
|
||||||
# parser options are just dictionaries with arbitrary values
|
|
||||||
T_ParserOpts = Dict[str, any]
|
|
||||||
|
|
||||||
NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
|
|
||||||
|
|
||||||
|
|
||||||
class Int32:
|
|
||||||
_type = c_int32
|
|
||||||
__slots__ = ('_val',)
|
|
||||||
|
|
||||||
def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0):
|
|
||||||
if isinstance(val, (bytes, bytearray)):
|
|
||||||
self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True))
|
|
||||||
elif isinstance(val, self.__class__._type):
|
|
||||||
self._val = val
|
|
||||||
elif isinstance(val, (c_uint32, c_int32, Int32)):
|
|
||||||
self._val = self.__class__._type(val.value)
|
|
||||||
elif isinstance(val, int):
|
|
||||||
self._val = self.__class__._type(val)
|
|
||||||
else:
|
|
||||||
raise RuntimeError(
|
|
||||||
"Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val)
|
|
||||||
)
|
|
||||||
|
|
||||||
def __add__(self, other: Union['Int32', int]):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
|
|
||||||
return self.__class__(self._val.value + other)
|
|
||||||
|
|
||||||
def __sub__(self, other: Union['Int32', int]):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.__class__(self._val.value - other)
|
|
||||||
|
|
||||||
def __mul__(self, other: Union['Int32', int]):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.__class__(self._val.value * other)
|
|
||||||
|
|
||||||
def __truediv__(self, other):
|
|
||||||
return self // other
|
|
||||||
|
|
||||||
def __floordiv__(self, other):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.__class__(self.value // other)
|
|
||||||
|
|
||||||
def __mod__(self, other: Union['Int32', int]):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.__class__(self._val.value % other)
|
|
||||||
|
|
||||||
def __and__(self, other: Union['Int32', int]):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.__class__(self._val.value & other)
|
|
||||||
|
|
||||||
def __or__(self, other: Union['Int32', int]):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.__class__(self._val.value | other)
|
|
||||||
|
|
||||||
def __xor__(self, other: Union['Int32', int]):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.__class__(self._val.value ^ other)
|
|
||||||
|
|
||||||
def __lshift__(self, other: Union['Int32', int]):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.__class__(self.value << other)
|
|
||||||
|
|
||||||
def __rshift__(self, other: Union['Int32', int]):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.__class__(self.value >> other)
|
|
||||||
|
|
||||||
def __eq__(self, other: Union['Int32', int]):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.value == other
|
|
||||||
|
|
||||||
def __neg__(self):
|
|
||||||
return self.__class__(-self._val.value)
|
|
||||||
|
|
||||||
def __abs__(self):
|
|
||||||
return self.__class__(abs(self.value))
|
|
||||||
|
|
||||||
def __bytes__(self):
|
|
||||||
return self.to_bytes(4)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '{}({})'.format(self.__class__.__name__, self.value)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return str(self.value)
|
|
||||||
|
|
||||||
def __format__(self, format_spec):
|
|
||||||
return self.value.__format__(format_spec)
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.value)
|
|
||||||
|
|
||||||
def __gt__(self, other):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.value > other
|
|
||||||
|
|
||||||
def __lt__(self, other):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.value < other
|
|
||||||
|
|
||||||
def __le__(self, other):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.value <= other
|
|
||||||
|
|
||||||
def __ge__(self, other):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.value >= other
|
|
||||||
|
|
||||||
def __bool__(self):
|
|
||||||
return bool(self.value)
|
|
||||||
|
|
||||||
def __cmp__(self, other):
|
|
||||||
if isinstance(other, Int32):
|
|
||||||
other = other.value
|
|
||||||
return self.value.__cmp__(other)
|
|
||||||
|
|
||||||
# right handed binary operators
|
|
||||||
|
|
||||||
def __radd__(self, other):
|
|
||||||
return self + other
|
|
||||||
|
|
||||||
def __rsub__(self, other):
|
|
||||||
return self.__class__(other) - self
|
|
||||||
|
|
||||||
def __rmul__(self, other):
|
|
||||||
return self * other
|
|
||||||
|
|
||||||
def __rtruediv__(self, other):
|
|
||||||
return self.__class__(other) // self
|
|
||||||
|
|
||||||
def __rfloordiv__(self, other):
|
|
||||||
return self.__class__(other) // self
|
|
||||||
|
|
||||||
def __rmod__(self, other):
|
|
||||||
return self.__class__(other) % self
|
|
||||||
|
|
||||||
def __rand__(self, other):
|
|
||||||
return self.__class__(other) & self
|
|
||||||
|
|
||||||
def __ror__(self, other):
|
|
||||||
return self.__class__(other) | self
|
|
||||||
|
|
||||||
def __rxor__(self, other):
|
|
||||||
return self.__class__(other) ^ self
|
|
||||||
|
|
||||||
@property
|
|
||||||
def value(self):
|
|
||||||
return self._val.value
|
|
||||||
|
|
||||||
def unsigned(self) -> 'UInt32':
|
|
||||||
return UInt32(self)
|
|
||||||
|
|
||||||
def to_bytes(self, bytes: int = 4) -> bytearray:
|
|
||||||
return bytearray(self.unsigned_value.to_bytes(bytes, 'little'))
|
|
||||||
|
|
||||||
def signed(self) -> 'Int32':
|
|
||||||
if self.__class__ == Int32:
|
|
||||||
return self
|
|
||||||
return Int32(self)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def unsigned_value(self):
|
|
||||||
return c_uint32(self.value).value
|
|
||||||
|
|
||||||
def shift_right_logical(self, ammount: Union['Int32', int]):
|
|
||||||
if isinstance(ammount, Int32):
|
|
||||||
ammount = ammount.value
|
|
||||||
return self.__class__((self.value % 0x100000000) >> ammount)
|
|
||||||
|
|
||||||
def __int__(self):
|
|
||||||
return self.value
|
|
||||||
|
|
||||||
def __hex__(self):
|
|
||||||
return hex(self.value)
|
|
||||||
|
|
||||||
|
|
||||||
class UInt32(Int32):
|
|
||||||
_type = c_uint32
|
|
||||||
|
|
||||||
def unsigned(self) -> 'UInt32':
|
|
||||||
return self
|
|
||||||
|
|
||||||
@property
|
|
||||||
def unsigned_value(self):
|
|
||||||
return self._val.value
|
|
||||||
|
|
||||||
def shift_right_logical(self, ammount: Union['Int32', int]):
|
|
||||||
return self >> ammount
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class MemoryFlags:
|
|
||||||
read_only: bool
|
|
||||||
executable: bool
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "r{}{}".format(
|
|
||||||
'-' if self.read_only else 'w',
|
|
||||||
'x' if self.executable else '-'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class InstructionContext:
|
|
||||||
base_address: T_AbsoluteAddress
|
|
||||||
"""
|
|
||||||
The address where the instruction block is placed
|
|
||||||
"""
|
|
||||||
|
|
||||||
labels: Dict[str, T_RelativeAddress]
|
|
||||||
"""
|
|
||||||
This dictionary maps all labels to their relative position of the instruction block
|
|
||||||
"""
|
|
||||||
|
|
||||||
numbered_labels: Dict[str, List[T_RelativeAddress]]
|
|
||||||
"""
|
|
||||||
This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where
|
|
||||||
the label was placed
|
|
||||||
"""
|
|
||||||
|
|
||||||
global_symbol_dict: Dict[str, T_AbsoluteAddress]
|
|
||||||
"""
|
|
||||||
A reference to the MMU's global symbol dictionary for access to global symbols
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.labels = dict()
|
|
||||||
self.numbered_labels = defaultdict(list)
|
|
||||||
self.base_address = 0
|
|
||||||
self.global_symbol_dict = dict()
|
|
||||||
|
|
||||||
def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]:
|
|
||||||
if NUMBER_SYMBOL_PATTERN.match(symbol):
|
|
||||||
if address_at is None:
|
|
||||||
raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
|
|
||||||
|
|
||||||
direction = symbol[-1]
|
|
||||||
values = self.numbered_labels.get(symbol[:-1], [])
|
|
||||||
if direction == 'b':
|
|
||||||
return max((addr + self.base_address for addr in values if addr < address_at), default=None)
|
|
||||||
else:
|
|
||||||
return min((addr + self.base_address for addr in values if addr > address_at), default=None)
|
|
||||||
else:
|
|
||||||
# if it's not a local symbol, try the globals
|
|
||||||
if symbol not in self.labels:
|
|
||||||
return self.global_symbol_dict.get(symbol, None)
|
|
||||||
# otherwise return the local symbol
|
|
||||||
return self.labels.get(symbol, None)
|
|
||||||
|
|
||||||
|
|
||||||
class Instruction(ABC):
|
|
||||||
name: str
|
|
||||||
args: tuple
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_imm(self, num: int) -> int:
|
|
||||||
"""
|
|
||||||
parse and get immediate argument
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_imm_reg(self, num: int) -> Tuple[int, str]:
|
|
||||||
"""
|
|
||||||
parse and get an argument imm(reg)
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_reg(self, num: int) -> str:
|
|
||||||
"""
|
|
||||||
parse and get an register argument
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "{} {}".format(self.name, ", ".join(self.args))
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class MemorySection(ABC):
|
|
||||||
name: str
|
|
||||||
flags: MemoryFlags
|
|
||||||
size: int
|
|
||||||
base: T_AbsoluteAddress
|
|
||||||
owner: str
|
|
||||||
context: InstructionContext
|
|
||||||
|
|
||||||
@property
|
|
||||||
def end(self):
|
|
||||||
return self.base + self.size
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
|
|
||||||
bytes_per_row: int = 16, rows: int = 10, group: int = 4):
|
|
||||||
if self.flags.executable:
|
|
||||||
bytes_per_row = 4
|
|
||||||
highlight = None
|
|
||||||
if end is None:
|
|
||||||
end = min(start + (bytes_per_row * (rows // 2)), self.size - 1)
|
|
||||||
highlight = start
|
|
||||||
start = max(0, start - (bytes_per_row * (rows // 2)))
|
|
||||||
|
|
||||||
if self.flags.executable:
|
|
||||||
print(FMT_MEM + "{}, viewing {} instructions:".format(
|
|
||||||
self, (end - start) // 4
|
|
||||||
) + FMT_NONE)
|
|
||||||
|
|
||||||
for addr in range(start, end, 4):
|
|
||||||
if addr == highlight:
|
|
||||||
print(FMT_UNDERLINE + FMT_ORANGE, end='')
|
|
||||||
print("0x{:04x}: {}{}".format(
|
|
||||||
self.base + addr, self.read_ins(addr), FMT_NONE
|
|
||||||
))
|
|
||||||
else:
|
|
||||||
print(FMT_MEM + "{}, viewing {} bytes:".format(
|
|
||||||
self, (end - start)
|
|
||||||
) + FMT_NONE)
|
|
||||||
|
|
||||||
aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end
|
|
||||||
|
|
||||||
for addr in range(start, aligned_end, bytes_per_row):
|
|
||||||
hi_ind = (highlight - addr) // group if highlight is not None else -1
|
|
||||||
print("0x{:04x}: {}{}".format(
|
|
||||||
self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
|
|
||||||
))
|
|
||||||
|
|
||||||
if aligned_end != end:
|
|
||||||
hi_ind = (highlight - aligned_end) // group if highlight is not None else -1
|
|
||||||
print("0x{:04x}: {}{}".format(
|
|
||||||
self.base + aligned_end, format_bytes(
|
|
||||||
self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind
|
|
||||||
), FMT_NONE
|
|
||||||
))
|
|
||||||
|
|
||||||
def dump_all(self, *args, **kwargs):
|
|
||||||
self.dump(0, self.size, *args, **kwargs)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
|
|
||||||
self.__class__.__name__,
|
|
||||||
self.name,
|
|
||||||
self.base,
|
|
||||||
self.size,
|
|
||||||
self.flags,
|
|
||||||
self.owner
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Program:
|
|
||||||
"""
|
|
||||||
This represents a collection of sections which together form an executable program
|
|
||||||
|
|
||||||
When you want to create a program which can be located anywhere in memory, set base to None,
|
|
||||||
this signals the other components, that this is relocatable. Set the base of each section to
|
|
||||||
the offset in the program, and everything will be taken care of for you.
|
|
||||||
|
|
||||||
"""
|
|
||||||
name: str
|
|
||||||
context: InstructionContext
|
|
||||||
global_labels: Set[str]
|
|
||||||
relative_labels: Set[str]
|
|
||||||
sections: List[MemorySection]
|
|
||||||
base: Optional[T_AbsoluteAddress]
|
|
||||||
is_loaded: bool
|
|
||||||
|
|
||||||
@property
|
|
||||||
def size(self):
|
|
||||||
if len(self.sections) == 0:
|
|
||||||
return 0
|
|
||||||
if self.base is None:
|
|
||||||
return self.sections[-1].base + self.sections[-1].size
|
|
||||||
return (self.sections[-1].base - self.base) + self.sections[-1].size
|
|
||||||
|
|
||||||
def __init__(self, name: str, base: Optional[int] = None):
|
|
||||||
self.name = name
|
|
||||||
self.context = InstructionContext()
|
|
||||||
self.sections = []
|
|
||||||
self.global_labels = set()
|
|
||||||
self.relative_labels = set()
|
|
||||||
self.base = base
|
|
||||||
self.is_loaded = False
|
|
||||||
|
|
||||||
def add_section(self, sec: MemorySection):
|
|
||||||
# print a warning when a section is located before the programs base
|
|
||||||
if self.base is not None:
|
|
||||||
if sec.base < self.base:
|
|
||||||
print(
|
|
||||||
FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format(
|
|
||||||
sec, self.name, self.base
|
|
||||||
) + FMT_NONE)
|
|
||||||
|
|
||||||
self.sections.append(sec)
|
|
||||||
# keep section list ordered
|
|
||||||
self.sections.sort(key=lambda section: section.base)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "{}(name={},sections={},base={})".format(
|
|
||||||
self.__class__.__name__, self.name, self.global_labels,
|
|
||||||
[s.name for s in self.sections], self.base
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def entrypoint(self):
|
|
||||||
if '_start' in self.context.labels:
|
|
||||||
return self.context.labels.get('_start')
|
|
||||||
if 'main' in self.context.labels:
|
|
||||||
return self.context.labels.get('main')
|
|
||||||
for sec in self.sections:
|
|
||||||
if get_section_base_name(sec.name) == '.text' and sec.flags.executable:
|
|
||||||
return sec.base
|
|
||||||
|
|
||||||
def loaded_trigger(self, at_addr: T_AbsoluteAddress):
|
|
||||||
"""
|
|
||||||
This trigger is called when the binary is loaded and its final address in memory is determined
|
|
||||||
|
|
||||||
This will do a small sanity check to prevent programs loading twice, or at addresses they don't
|
|
||||||
expect to be loaded.
|
|
||||||
|
|
||||||
Then it will finalize all relative symbols defined in it to point to the correct addresses.
|
|
||||||
|
|
||||||
:param at_addr: the address where the program will be located
|
|
||||||
"""
|
|
||||||
if self.is_loaded:
|
|
||||||
if at_addr != self.base:
|
|
||||||
raise RuntimeError("Program loaded twice at different addresses! This will probably break things!")
|
|
||||||
return
|
|
||||||
|
|
||||||
if self.base is not None and self.base != at_addr:
|
|
||||||
print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, '
|
|
||||||
'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE)
|
|
||||||
|
|
||||||
# check if we are relocating
|
|
||||||
if self.base != at_addr:
|
|
||||||
offset = at_addr if self.base is None else at_addr - self.base
|
|
||||||
|
|
||||||
# move all sections by the offset
|
|
||||||
for sec in self.sections:
|
|
||||||
sec.base += offset
|
|
||||||
|
|
||||||
# move all relative symbols by the offset
|
|
||||||
for name in self.relative_labels:
|
|
||||||
self.context.labels[name] += offset
|
|
||||||
|
|
||||||
self.base = at_addr
|
|
||||||
self.context.base_address = at_addr
|
|
||||||
|
|
||||||
|
|
||||||
class ProgramLoader(ABC):
|
|
||||||
"""
|
|
||||||
A program loader is always specific to a given source file. It is a place to store all state
|
|
||||||
concerning the parsing and loading of that specific source file, including options.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, source_path: str, options: T_ParserOpts):
|
|
||||||
self.source_path = source_path
|
|
||||||
self.options = options
|
|
||||||
self.filename = os.path.split(self.source_path)[-1]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
@abstractmethod
|
|
||||||
def can_parse(cls, source_path: str) -> float:
|
|
||||||
"""
|
|
||||||
Return confidence that the file located at source_path
|
|
||||||
should be parsed and loaded by this loader
|
|
||||||
:param source_path: the path of the source file
|
|
||||||
:return: the confidence that this file belongs to this parser
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
@abstractmethod
|
|
||||||
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
|
|
||||||
"""
|
|
||||||
parse command line args into an options dictionary
|
|
||||||
|
|
||||||
:param argv: the command line args list
|
|
||||||
:return: all remaining command line args and the parser options object
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
|
|
||||||
"""
|
|
||||||
Instantiate a loader for the given source file with the required arguments
|
|
||||||
|
|
||||||
:param source_path: the path to the source file
|
|
||||||
:param options: the parsed options (guaranteed to come from this classes get_options method.
|
|
||||||
:return: An instance of a ProgramLoader for the spcified source
|
|
||||||
"""
|
|
||||||
return cls(source_path, options)
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def parse(self) -> Union[Program, Iterator[Program]]:
|
|
||||||
"""
|
|
||||||
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class CPU(ABC):
|
|
||||||
# static cpu configuration
|
|
||||||
INS_XLEN: int = 4
|
|
||||||
|
|
||||||
# housekeeping variables
|
|
||||||
regs: Registers
|
|
||||||
mmu: 'MMU'
|
|
||||||
pc: T_AbsoluteAddress
|
|
||||||
cycle: int
|
|
||||||
halted: bool
|
|
||||||
|
|
||||||
# debugging context
|
|
||||||
debugger_active: bool
|
|
||||||
|
|
||||||
# instruction information
|
|
||||||
instructions: Dict[str, Callable[[Instruction], None]]
|
|
||||||
instruction_sets: Set['InstructionSet']
|
|
||||||
|
|
||||||
# configuration
|
|
||||||
conf: RunConfig
|
|
||||||
|
|
||||||
def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig):
|
|
||||||
self.mmu = mmu
|
|
||||||
self.regs = Registers()
|
|
||||||
self.conf = conf
|
|
||||||
|
|
||||||
self.instruction_sets = set()
|
|
||||||
self.instructions = dict()
|
|
||||||
|
|
||||||
for set_class in instruction_sets:
|
|
||||||
ins_set = set_class(self)
|
|
||||||
self.instructions.update(ins_set.load())
|
|
||||||
self.instruction_sets.add(ins_set)
|
|
||||||
|
|
||||||
self.halted = False
|
|
||||||
self.cycle = 0
|
|
||||||
self.pc = 0
|
|
||||||
self.debugger_active = False
|
|
||||||
|
|
||||||
def run_instruction(self, ins: Instruction):
|
|
||||||
"""
|
|
||||||
Execute a single instruction
|
|
||||||
|
|
||||||
:param ins: The instruction to execute
|
|
||||||
"""
|
|
||||||
if ins.name in self.instructions:
|
|
||||||
self.instructions[ins.name](ins)
|
|
||||||
else:
|
|
||||||
# this should never be reached, as unknown instructions are imparseable
|
|
||||||
raise RuntimeError("Unknown instruction: {}".format(ins))
|
|
||||||
|
|
||||||
def load_program(self, program: Program):
|
|
||||||
self.mmu.load_program(program)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
"""
|
|
||||||
Returns a representation of the CPU and some of its state.
|
|
||||||
"""
|
|
||||||
return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format(
|
|
||||||
self.__class__.__name__,
|
|
||||||
self.pc,
|
|
||||||
self.cycle,
|
|
||||||
self.halted,
|
|
||||||
" ".join(s.name for s in self.instruction_sets)
|
|
||||||
)
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def step(self, verbose=False):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def run(self, verbose=False):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def launch(self, program: Program, verbose: bool = False):
|
|
||||||
if program not in self.mmu.programs:
|
|
||||||
print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE)
|
|
||||||
return
|
|
||||||
|
|
||||||
self.pc = program.entrypoint
|
|
||||||
self.run(verbose)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
@abstractmethod
|
|
||||||
def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]:
|
|
||||||
return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name))
|
|
||||||
|
|
||||||
@property
|
|
||||||
def sections(self):
|
|
||||||
return self.mmu.sections
|
|
||||||
|
|
||||||
@property
|
|
||||||
def programs(self):
|
|
||||||
return self.mmu.programs
|
|
@ -0,0 +1,26 @@
|
|||||||
|
from typing import Dict
|
||||||
|
import re
|
||||||
|
|
||||||
|
# define some base type aliases so we can keep track of absolute and relative addresses
|
||||||
|
T_RelativeAddress = int
|
||||||
|
T_AbsoluteAddress = int
|
||||||
|
|
||||||
|
# parser options are just dictionaries with arbitrary values
|
||||||
|
T_ParserOpts = Dict[str, any]
|
||||||
|
|
||||||
|
NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
|
||||||
|
|
||||||
|
from .flags import MemoryFlags
|
||||||
|
from .int32 import UInt32, Int32
|
||||||
|
from .instruction import Instruction
|
||||||
|
from .instruction_context import InstructionContext
|
||||||
|
from .memory_section import MemorySection
|
||||||
|
from .program import Program
|
||||||
|
from .program_loader import ProgramLoader
|
||||||
|
from .cpu import CPU
|
||||||
|
from .simple_instruction import SimpleInstruction
|
||||||
|
from .instruction_memory_section import InstructionMemorySection
|
||||||
|
from .binary_data_memory_section import BinaryDataMemorySection
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,29 @@
|
|||||||
|
from . import MemorySection, InstructionContext, MemoryFlags, T_RelativeAddress, Instruction
|
||||||
|
from ..exceptions import MemoryAccessException
|
||||||
|
|
||||||
|
|
||||||
|
class BinaryDataMemorySection(MemorySection):
|
||||||
|
def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None):
|
||||||
|
self.name = name
|
||||||
|
self.base = base
|
||||||
|
self.context = context
|
||||||
|
self.size = len(data)
|
||||||
|
self.flags = flags if flags is not None else MemoryFlags(False, False)
|
||||||
|
self.data = data
|
||||||
|
self.owner = owner
|
||||||
|
|
||||||
|
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||||
|
if offset + size > self.size:
|
||||||
|
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
|
||||||
|
return self.data[offset:offset + size]
|
||||||
|
|
||||||
|
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||||
|
if offset + size > self.size:
|
||||||
|
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
|
||||||
|
if len(data[0:size]) != size:
|
||||||
|
raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
|
||||||
|
self.data[offset:offset + size] = data[0:size]
|
||||||
|
|
||||||
|
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||||
|
raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
|
||||||
|
offset, 4, 'instruction fetch')
|
@ -0,0 +1,107 @@
|
|||||||
|
import typing
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import List, Type, Callable, Set, Dict
|
||||||
|
|
||||||
|
from ..registers import Registers
|
||||||
|
from ..config import RunConfig
|
||||||
|
from ..colors import FMT_RED, FMT_NONE
|
||||||
|
from . import T_AbsoluteAddress, Instruction, Program, ProgramLoader
|
||||||
|
|
||||||
|
|
||||||
|
class CPU(ABC):
|
||||||
|
# static cpu configuration
|
||||||
|
INS_XLEN: int = 4
|
||||||
|
|
||||||
|
# housekeeping variables
|
||||||
|
regs: Registers
|
||||||
|
mmu: 'MMU'
|
||||||
|
pc: T_AbsoluteAddress
|
||||||
|
cycle: int
|
||||||
|
halted: bool
|
||||||
|
|
||||||
|
# debugging context
|
||||||
|
debugger_active: bool
|
||||||
|
|
||||||
|
# instruction information
|
||||||
|
instructions: Dict[str, Callable[[Instruction], None]]
|
||||||
|
instruction_sets: Set['InstructionSet']
|
||||||
|
|
||||||
|
# configuration
|
||||||
|
conf: RunConfig
|
||||||
|
|
||||||
|
def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig):
|
||||||
|
self.mmu = mmu
|
||||||
|
self.regs = Registers()
|
||||||
|
self.conf = conf
|
||||||
|
|
||||||
|
self.instruction_sets = set()
|
||||||
|
self.instructions = dict()
|
||||||
|
|
||||||
|
for set_class in instruction_sets:
|
||||||
|
ins_set = set_class(self)
|
||||||
|
self.instructions.update(ins_set.load())
|
||||||
|
self.instruction_sets.add(ins_set)
|
||||||
|
|
||||||
|
self.halted = False
|
||||||
|
self.cycle = 0
|
||||||
|
self.pc = 0
|
||||||
|
self.debugger_active = False
|
||||||
|
|
||||||
|
def run_instruction(self, ins: Instruction):
|
||||||
|
"""
|
||||||
|
Execute a single instruction
|
||||||
|
|
||||||
|
:param ins: The instruction to execute
|
||||||
|
"""
|
||||||
|
if ins.name in self.instructions:
|
||||||
|
self.instructions[ins.name](ins)
|
||||||
|
else:
|
||||||
|
# this should never be reached, as unknown instructions are imparseable
|
||||||
|
raise RuntimeError("Unknown instruction: {}".format(ins))
|
||||||
|
|
||||||
|
def load_program(self, program: Program):
|
||||||
|
self.mmu.load_program(program)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
"""
|
||||||
|
Returns a representation of the CPU and some of its state.
|
||||||
|
"""
|
||||||
|
return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format(
|
||||||
|
self.__class__.__name__,
|
||||||
|
self.pc,
|
||||||
|
self.cycle,
|
||||||
|
self.halted,
|
||||||
|
" ".join(s.name for s in self.instruction_sets)
|
||||||
|
)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def step(self, verbose=False):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def run(self, verbose=False):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def launch(self, program: Program, verbose: bool = False):
|
||||||
|
if program not in self.mmu.programs:
|
||||||
|
print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE)
|
||||||
|
return
|
||||||
|
|
||||||
|
self.pc = program.entrypoint
|
||||||
|
self.run(verbose)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]:
|
||||||
|
return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def sections(self):
|
||||||
|
return self.mmu.sections
|
||||||
|
|
||||||
|
@property
|
||||||
|
def programs(self):
|
||||||
|
return self.mmu.programs
|
@ -0,0 +1,13 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class MemoryFlags:
|
||||||
|
read_only: bool
|
||||||
|
executable: bool
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "r{}{}".format(
|
||||||
|
'-' if self.read_only else 'w',
|
||||||
|
'x' if self.executable else '-'
|
||||||
|
)
|
@ -0,0 +1,31 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
|
||||||
|
class Instruction(ABC):
|
||||||
|
name: str
|
||||||
|
args: tuple
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_imm(self, num: int) -> int:
|
||||||
|
"""
|
||||||
|
parse and get immediate argument
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_imm_reg(self, num: int) -> Tuple[int, str]:
|
||||||
|
"""
|
||||||
|
parse and get an argument imm(reg)
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_reg(self, num: int) -> str:
|
||||||
|
"""
|
||||||
|
parse and get an register argument
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{} {}".format(self.name, ", ".join(self.args))
|
@ -0,0 +1,53 @@
|
|||||||
|
from collections import defaultdict
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from ..exceptions import ParseException
|
||||||
|
from ..types import T_AbsoluteAddress, T_RelativeAddress, NUMBER_SYMBOL_PATTERN
|
||||||
|
|
||||||
|
|
||||||
|
class InstructionContext:
|
||||||
|
base_address: T_AbsoluteAddress
|
||||||
|
"""
|
||||||
|
The address where the instruction block is placed
|
||||||
|
"""
|
||||||
|
|
||||||
|
labels: Dict[str, T_RelativeAddress]
|
||||||
|
"""
|
||||||
|
This dictionary maps all labels to their relative position of the instruction block
|
||||||
|
"""
|
||||||
|
|
||||||
|
numbered_labels: Dict[str, List[T_RelativeAddress]]
|
||||||
|
"""
|
||||||
|
This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where
|
||||||
|
the label was placed
|
||||||
|
"""
|
||||||
|
|
||||||
|
global_symbol_dict: Dict[str, T_AbsoluteAddress]
|
||||||
|
"""
|
||||||
|
A reference to the MMU's global symbol dictionary for access to global symbols
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.labels = dict()
|
||||||
|
self.numbered_labels = defaultdict(list)
|
||||||
|
self.base_address = 0
|
||||||
|
self.global_symbol_dict = dict()
|
||||||
|
|
||||||
|
def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]:
|
||||||
|
if NUMBER_SYMBOL_PATTERN.match(symbol):
|
||||||
|
if address_at is None:
|
||||||
|
raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
|
||||||
|
|
||||||
|
direction = symbol[-1]
|
||||||
|
values = self.numbered_labels.get(symbol[:-1], [])
|
||||||
|
if direction == 'b':
|
||||||
|
return max((addr + self.base_address for addr in values if addr < address_at), default=None)
|
||||||
|
else:
|
||||||
|
return min((addr + self.base_address for addr in values if addr > address_at), default=None)
|
||||||
|
else:
|
||||||
|
# if it's not a local symbol, try the globals
|
||||||
|
if symbol not in self.labels:
|
||||||
|
return self.global_symbol_dict.get(symbol, None)
|
||||||
|
# otherwise return the local symbol
|
||||||
|
return self.labels.get(symbol, None)
|
||||||
|
|
@ -0,0 +1,27 @@
|
|||||||
|
from typing import List
|
||||||
|
|
||||||
|
from . import MemorySection, Instruction, InstructionContext, MemoryFlags, T_RelativeAddress
|
||||||
|
from .. import MemoryAccessException
|
||||||
|
|
||||||
|
|
||||||
|
class InstructionMemorySection(MemorySection):
|
||||||
|
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0):
|
||||||
|
self.name = name
|
||||||
|
self.base = base
|
||||||
|
self.context = context
|
||||||
|
self.size = len(instructions) * 4
|
||||||
|
self.flags = MemoryFlags(True, True)
|
||||||
|
self.instructions = instructions
|
||||||
|
self.owner = owner
|
||||||
|
|
||||||
|
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||||
|
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
|
||||||
|
|
||||||
|
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||||
|
raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
|
||||||
|
|
||||||
|
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||||
|
if offset % 4 != 0:
|
||||||
|
raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
|
||||||
|
return self.instructions[offset // 4]
|
||||||
|
|
@ -0,0 +1,202 @@
|
|||||||
|
from typing import Union
|
||||||
|
from ctypes import c_int32, c_uint32
|
||||||
|
|
||||||
|
|
||||||
|
class Int32:
|
||||||
|
_type = c_int32
|
||||||
|
__slots__ = ('_val',)
|
||||||
|
|
||||||
|
def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0):
|
||||||
|
if isinstance(val, (bytes, bytearray)):
|
||||||
|
self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True))
|
||||||
|
elif isinstance(val, self.__class__._type):
|
||||||
|
self._val = val
|
||||||
|
elif isinstance(val, (c_uint32, c_int32, Int32)):
|
||||||
|
self._val = self.__class__._type(val.value)
|
||||||
|
elif isinstance(val, int):
|
||||||
|
self._val = self.__class__._type(val)
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val)
|
||||||
|
)
|
||||||
|
|
||||||
|
def __add__(self, other: Union['Int32', int]):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
|
||||||
|
return self.__class__(self._val.value + other)
|
||||||
|
|
||||||
|
def __sub__(self, other: Union['Int32', int]):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.__class__(self._val.value - other)
|
||||||
|
|
||||||
|
def __mul__(self, other: Union['Int32', int]):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.__class__(self._val.value * other)
|
||||||
|
|
||||||
|
def __truediv__(self, other):
|
||||||
|
return self // other
|
||||||
|
|
||||||
|
def __floordiv__(self, other):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.__class__(self.value // other)
|
||||||
|
|
||||||
|
def __mod__(self, other: Union['Int32', int]):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.__class__(self._val.value % other)
|
||||||
|
|
||||||
|
def __and__(self, other: Union['Int32', int]):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.__class__(self._val.value & other)
|
||||||
|
|
||||||
|
def __or__(self, other: Union['Int32', int]):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.__class__(self._val.value | other)
|
||||||
|
|
||||||
|
def __xor__(self, other: Union['Int32', int]):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.__class__(self._val.value ^ other)
|
||||||
|
|
||||||
|
def __lshift__(self, other: Union['Int32', int]):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.__class__(self.value << other)
|
||||||
|
|
||||||
|
def __rshift__(self, other: Union['Int32', int]):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.__class__(self.value >> other)
|
||||||
|
|
||||||
|
def __eq__(self, other: Union['Int32', int]):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.value == other
|
||||||
|
|
||||||
|
def __neg__(self):
|
||||||
|
return self.__class__(-self._val.value)
|
||||||
|
|
||||||
|
def __abs__(self):
|
||||||
|
return self.__class__(abs(self.value))
|
||||||
|
|
||||||
|
def __bytes__(self):
|
||||||
|
return self.to_bytes(4)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '{}({})'.format(self.__class__.__name__, self.value)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.value)
|
||||||
|
|
||||||
|
def __format__(self, format_spec):
|
||||||
|
return self.value.__format__(format_spec)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.value)
|
||||||
|
|
||||||
|
def __gt__(self, other):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.value > other
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.value < other
|
||||||
|
|
||||||
|
def __le__(self, other):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.value <= other
|
||||||
|
|
||||||
|
def __ge__(self, other):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.value >= other
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return bool(self.value)
|
||||||
|
|
||||||
|
def __cmp__(self, other):
|
||||||
|
if isinstance(other, Int32):
|
||||||
|
other = other.value
|
||||||
|
return self.value.__cmp__(other)
|
||||||
|
|
||||||
|
# right handed binary operators
|
||||||
|
|
||||||
|
def __radd__(self, other):
|
||||||
|
return self + other
|
||||||
|
|
||||||
|
def __rsub__(self, other):
|
||||||
|
return self.__class__(other) - self
|
||||||
|
|
||||||
|
def __rmul__(self, other):
|
||||||
|
return self * other
|
||||||
|
|
||||||
|
def __rtruediv__(self, other):
|
||||||
|
return self.__class__(other) // self
|
||||||
|
|
||||||
|
def __rfloordiv__(self, other):
|
||||||
|
return self.__class__(other) // self
|
||||||
|
|
||||||
|
def __rmod__(self, other):
|
||||||
|
return self.__class__(other) % self
|
||||||
|
|
||||||
|
def __rand__(self, other):
|
||||||
|
return self.__class__(other) & self
|
||||||
|
|
||||||
|
def __ror__(self, other):
|
||||||
|
return self.__class__(other) | self
|
||||||
|
|
||||||
|
def __rxor__(self, other):
|
||||||
|
return self.__class__(other) ^ self
|
||||||
|
|
||||||
|
@property
|
||||||
|
def value(self):
|
||||||
|
return self._val.value
|
||||||
|
|
||||||
|
def unsigned(self) -> 'UInt32':
|
||||||
|
return UInt32(self)
|
||||||
|
|
||||||
|
def to_bytes(self, bytes: int = 4) -> bytearray:
|
||||||
|
return bytearray(self.unsigned_value.to_bytes(bytes, 'little'))
|
||||||
|
|
||||||
|
def signed(self) -> 'Int32':
|
||||||
|
if self.__class__ == Int32:
|
||||||
|
return self
|
||||||
|
return Int32(self)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def unsigned_value(self):
|
||||||
|
return c_uint32(self.value).value
|
||||||
|
|
||||||
|
def shift_right_logical(self, ammount: Union['Int32', int]):
|
||||||
|
if isinstance(ammount, Int32):
|
||||||
|
ammount = ammount.value
|
||||||
|
return self.__class__((self.value % 0x100000000) >> ammount)
|
||||||
|
|
||||||
|
def __int__(self):
|
||||||
|
return self.value
|
||||||
|
|
||||||
|
def __hex__(self):
|
||||||
|
return hex(self.value)
|
||||||
|
|
||||||
|
|
||||||
|
class UInt32(Int32):
|
||||||
|
_type = c_uint32
|
||||||
|
|
||||||
|
def unsigned(self) -> 'UInt32':
|
||||||
|
return self
|
||||||
|
|
||||||
|
@property
|
||||||
|
def unsigned_value(self):
|
||||||
|
return self._val.value
|
||||||
|
|
||||||
|
def shift_right_logical(self, ammount: Union['Int32', int]):
|
||||||
|
return self >> ammount
|
@ -0,0 +1,88 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from ..colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE
|
||||||
|
from ..helpers import format_bytes
|
||||||
|
from . import MemoryFlags, T_AbsoluteAddress, InstructionContext, T_RelativeAddress, Instruction
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MemorySection(ABC):
|
||||||
|
name: str
|
||||||
|
flags: MemoryFlags
|
||||||
|
size: int
|
||||||
|
base: T_AbsoluteAddress
|
||||||
|
owner: str
|
||||||
|
context: InstructionContext
|
||||||
|
|
||||||
|
@property
|
||||||
|
def end(self):
|
||||||
|
return self.base + self.size
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
|
||||||
|
bytes_per_row: int = 16, rows: int = 10, group: int = 4):
|
||||||
|
if self.flags.executable:
|
||||||
|
bytes_per_row = 4
|
||||||
|
highlight = None
|
||||||
|
if end is None:
|
||||||
|
end = min(start + (bytes_per_row * (rows // 2)), self.size - 1)
|
||||||
|
highlight = start
|
||||||
|
start = max(0, start - (bytes_per_row * (rows // 2)))
|
||||||
|
|
||||||
|
if self.flags.executable:
|
||||||
|
print(FMT_MEM + "{}, viewing {} instructions:".format(
|
||||||
|
self, (end - start) // 4
|
||||||
|
) + FMT_NONE)
|
||||||
|
|
||||||
|
for addr in range(start, end, 4):
|
||||||
|
if addr == highlight:
|
||||||
|
print(FMT_UNDERLINE + FMT_ORANGE, end='')
|
||||||
|
print("0x{:04x}: {}{}".format(
|
||||||
|
self.base + addr, self.read_ins(addr), FMT_NONE
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
print(FMT_MEM + "{}, viewing {} bytes:".format(
|
||||||
|
self, (end - start)
|
||||||
|
) + FMT_NONE)
|
||||||
|
|
||||||
|
aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end
|
||||||
|
|
||||||
|
for addr in range(start, aligned_end, bytes_per_row):
|
||||||
|
hi_ind = (highlight - addr) // group if highlight is not None else -1
|
||||||
|
print("0x{:04x}: {}{}".format(
|
||||||
|
self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
|
||||||
|
))
|
||||||
|
|
||||||
|
if aligned_end != end:
|
||||||
|
hi_ind = (highlight - aligned_end) // group if highlight is not None else -1
|
||||||
|
print("0x{:04x}: {}{}".format(
|
||||||
|
self.base + aligned_end, format_bytes(
|
||||||
|
self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind
|
||||||
|
), FMT_NONE
|
||||||
|
))
|
||||||
|
|
||||||
|
def dump_all(self, *args, **kwargs):
|
||||||
|
self.dump(0, self.size, *args, **kwargs)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
|
||||||
|
self.__class__.__name__,
|
||||||
|
self.name,
|
||||||
|
self.base,
|
||||||
|
self.size,
|
||||||
|
self.flags,
|
||||||
|
self.owner
|
||||||
|
)
|
@ -0,0 +1,104 @@
|
|||||||
|
from typing import List, Optional, Set
|
||||||
|
|
||||||
|
from ..colors import FMT_RED, FMT_BOLD, FMT_NONE, FMT_MEM
|
||||||
|
from ..helpers import get_section_base_name
|
||||||
|
from . import InstructionContext, T_AbsoluteAddress, MemorySection
|
||||||
|
|
||||||
|
|
||||||
|
class Program:
|
||||||
|
"""
|
||||||
|
This represents a collection of sections which together form an executable program
|
||||||
|
|
||||||
|
When you want to create a program which can be located anywhere in memory, set base to None,
|
||||||
|
this signals the other components, that this is relocatable. Set the base of each section to
|
||||||
|
the offset in the program, and everything will be taken care of for you.
|
||||||
|
|
||||||
|
"""
|
||||||
|
name: str
|
||||||
|
context: InstructionContext
|
||||||
|
global_labels: Set[str]
|
||||||
|
relative_labels: Set[str]
|
||||||
|
sections: List[MemorySection]
|
||||||
|
base: Optional[T_AbsoluteAddress]
|
||||||
|
is_loaded: bool
|
||||||
|
|
||||||
|
@property
|
||||||
|
def size(self):
|
||||||
|
if len(self.sections) == 0:
|
||||||
|
return 0
|
||||||
|
if self.base is None:
|
||||||
|
return self.sections[-1].base + self.sections[-1].size
|
||||||
|
return (self.sections[-1].base - self.base) + self.sections[-1].size
|
||||||
|
|
||||||
|
def __init__(self, name: str, base: Optional[int] = None):
|
||||||
|
self.name = name
|
||||||
|
self.context = InstructionContext()
|
||||||
|
self.sections = []
|
||||||
|
self.global_labels = set()
|
||||||
|
self.relative_labels = set()
|
||||||
|
self.base = base
|
||||||
|
self.is_loaded = False
|
||||||
|
|
||||||
|
def add_section(self, sec: MemorySection):
|
||||||
|
# print a warning when a section is located before the programs base
|
||||||
|
if self.base is not None:
|
||||||
|
if sec.base < self.base:
|
||||||
|
print(
|
||||||
|
FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format(
|
||||||
|
sec, self.name, self.base
|
||||||
|
) + FMT_NONE)
|
||||||
|
|
||||||
|
self.sections.append(sec)
|
||||||
|
# keep section list ordered
|
||||||
|
self.sections.sort(key=lambda section: section.base)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{}(name={},sections={},base={})".format(
|
||||||
|
self.__class__.__name__, self.name, self.global_labels,
|
||||||
|
[s.name for s in self.sections], self.base
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def entrypoint(self):
|
||||||
|
if '_start' in self.context.labels:
|
||||||
|
return self.context.labels.get('_start')
|
||||||
|
if 'main' in self.context.labels:
|
||||||
|
return self.context.labels.get('main')
|
||||||
|
for sec in self.sections:
|
||||||
|
if get_section_base_name(sec.name) == '.text' and sec.flags.executable:
|
||||||
|
return sec.base
|
||||||
|
|
||||||
|
def loaded_trigger(self, at_addr: T_AbsoluteAddress):
|
||||||
|
"""
|
||||||
|
This trigger is called when the binary is loaded and its final address in memory is determined
|
||||||
|
|
||||||
|
This will do a small sanity check to prevent programs loading twice, or at addresses they don't
|
||||||
|
expect to be loaded.
|
||||||
|
|
||||||
|
Then it will finalize all relative symbols defined in it to point to the correct addresses.
|
||||||
|
|
||||||
|
:param at_addr: the address where the program will be located
|
||||||
|
"""
|
||||||
|
if self.is_loaded:
|
||||||
|
if at_addr != self.base:
|
||||||
|
raise RuntimeError("Program loaded twice at different addresses! This will probably break things!")
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.base is not None and self.base != at_addr:
|
||||||
|
print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, '
|
||||||
|
'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE)
|
||||||
|
|
||||||
|
# check if we are relocating
|
||||||
|
if self.base != at_addr:
|
||||||
|
offset = at_addr if self.base is None else at_addr - self.base
|
||||||
|
|
||||||
|
# move all sections by the offset
|
||||||
|
for sec in self.sections:
|
||||||
|
sec.base += offset
|
||||||
|
|
||||||
|
# move all relative symbols by the offset
|
||||||
|
for name in self.relative_labels:
|
||||||
|
self.context.labels[name] += offset
|
||||||
|
|
||||||
|
self.base = at_addr
|
||||||
|
self.context.base_address = at_addr
|
@ -0,0 +1,58 @@
|
|||||||
|
import os
|
||||||
|
from abc import abstractmethod, ABC
|
||||||
|
from typing import Union, Iterator, List
|
||||||
|
|
||||||
|
from . import T_ParserOpts, Program
|
||||||
|
|
||||||
|
|
||||||
|
class ProgramLoader(ABC):
|
||||||
|
"""
|
||||||
|
A program loader is always specific to a given source file. It is a place to store all state
|
||||||
|
concerning the parsing and loading of that specific source file, including options.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, source_path: str, options: T_ParserOpts):
|
||||||
|
self.source_path = source_path
|
||||||
|
self.options = options
|
||||||
|
self.filename = os.path.split(self.source_path)[-1]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def can_parse(cls, source_path: str) -> float:
|
||||||
|
"""
|
||||||
|
Return confidence that the file located at source_path
|
||||||
|
should be parsed and loaded by this loader
|
||||||
|
:param source_path: the path of the source file
|
||||||
|
:return: the confidence that this file belongs to this parser
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
|
||||||
|
"""
|
||||||
|
parse command line args into an options dictionary
|
||||||
|
|
||||||
|
:param argv: the command line args list
|
||||||
|
:return: all remaining command line args and the parser options object
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
|
||||||
|
"""
|
||||||
|
Instantiate a loader for the given source file with the required arguments
|
||||||
|
|
||||||
|
:param source_path: the path to the source file
|
||||||
|
:param options: the parsed options (guaranteed to come from this classes get_options method.
|
||||||
|
:return: An instance of a ProgramLoader for the spcified source
|
||||||
|
"""
|
||||||
|
return cls(source_path, options)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def parse(self) -> Union[Program, Iterator[Program]]:
|
||||||
|
"""
|
||||||
|
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
pass
|
@ -0,0 +1,26 @@
|
|||||||
|
from typing import Union, Tuple
|
||||||
|
|
||||||
|
from . import Instruction, T_RelativeAddress, InstructionContext
|
||||||
|
from ..helpers import parse_numeric_argument
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleInstruction(Instruction):
|
||||||
|
def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]],
|
||||||
|
context: InstructionContext, addr: T_RelativeAddress):
|
||||||
|
self.context = context
|
||||||
|
self.name = name
|
||||||
|
self.args = args
|
||||||
|
self.addr = addr
|
||||||
|
|
||||||
|
def get_imm(self, num: int) -> int:
|
||||||
|
resolved_label = self.context.resolve_label(self.args[num], self.addr)
|
||||||
|
if resolved_label is None:
|
||||||
|
return parse_numeric_argument(self.args[num])
|
||||||
|
return resolved_label
|
||||||
|
|
||||||
|
def get_imm_reg(self, num: int) -> Tuple[int, str]:
|
||||||
|
return self.get_imm(num + 1), self.get_reg(num)
|
||||||
|
|
||||||
|
def get_reg(self, num: int) -> str:
|
||||||
|
return self.args[num]
|
||||||
|
|
Loading…
Reference in New Issue