[restructured] moved all simple type definitions into riscemu.types
parent
254410e9cc
commit
bc26ed3a02
@ -1,82 +0,0 @@
|
||||
"""
|
||||
This file contains a base implementation of Instruction, and MemorySection.
|
||||
|
||||
This aims to be a simple base, usable for everyone who needs the basic functionality, but doesn't
|
||||
want to set up their own subtypes of Instruction and MemorySection
|
||||
"""
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
from .exceptions import MemoryAccessException
|
||||
from .helpers import parse_numeric_argument
|
||||
from .types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \
|
||||
T_AbsoluteAddress, Program
|
||||
|
||||
|
||||
class SimpleInstruction(Instruction):
|
||||
def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]],
|
||||
context: InstructionContext, addr: T_RelativeAddress):
|
||||
self.context = context
|
||||
self.name = name
|
||||
self.args = args
|
||||
self.addr = addr
|
||||
|
||||
def get_imm(self, num: int) -> int:
|
||||
resolved_label = self.context.resolve_label(self.args[num], self.addr)
|
||||
if resolved_label is None:
|
||||
return parse_numeric_argument(self.args[num])
|
||||
return resolved_label
|
||||
|
||||
def get_imm_reg(self, num: int) -> Tuple[int, str]:
|
||||
return self.get_imm(num + 1), self.get_reg(num)
|
||||
|
||||
def get_reg(self, num: int) -> str:
|
||||
return self.args[num]
|
||||
|
||||
|
||||
class InstructionMemorySection(MemorySection):
|
||||
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0):
|
||||
self.name = name
|
||||
self.base = base
|
||||
self.context = context
|
||||
self.size = len(instructions) * 4
|
||||
self.flags = MemoryFlags(True, True)
|
||||
self.instructions = instructions
|
||||
self.owner = owner
|
||||
|
||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
|
||||
|
||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||
raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
|
||||
|
||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||
if offset % 4 != 0:
|
||||
raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
|
||||
return self.instructions[offset // 4]
|
||||
|
||||
|
||||
class BinaryDataMemorySection(MemorySection):
|
||||
def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None):
|
||||
self.name = name
|
||||
self.base = base
|
||||
self.context = context
|
||||
self.size = len(data)
|
||||
self.flags = flags if flags is not None else MemoryFlags(False, False)
|
||||
self.data = data
|
||||
self.owner = owner
|
||||
|
||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||
if offset + size > self.size:
|
||||
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
|
||||
return self.data[offset:offset + size]
|
||||
|
||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||
if offset + size > self.size:
|
||||
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
|
||||
if len(data[0:size]) != size:
|
||||
raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
|
||||
self.data[offset:offset + size] = data[0:size]
|
||||
|
||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||
raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
|
||||
offset, 4, 'instruction fetch')
|
@ -1,656 +0,0 @@
|
||||
"""
|
||||
RiscEmu (c) 2021 Anton Lydike
|
||||
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
This file contains abstract base classes and types, bundling only the absolute basic functionality
|
||||
|
||||
See base.py for some basic implementations of these classes
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import typing
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import defaultdict
|
||||
from ctypes import c_uint32, c_int32
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Tuple, Set, Union, Iterator, Callable, Type
|
||||
|
||||
from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE, FMT_RED, FMT_BOLD
|
||||
from .config import RunConfig
|
||||
from .exceptions import ParseException
|
||||
from .helpers import format_bytes, get_section_base_name
|
||||
from .registers import Registers
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from .MMU import MMU
|
||||
from .instructions.instruction_set import InstructionSet
|
||||
|
||||
# define some base type aliases so we can keep track of absolute and relative addresses
|
||||
T_RelativeAddress = int
|
||||
T_AbsoluteAddress = int
|
||||
|
||||
# parser options are just dictionaries with arbitrary values
|
||||
T_ParserOpts = Dict[str, any]
|
||||
|
||||
NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
|
||||
|
||||
|
||||
class Int32:
|
||||
_type = c_int32
|
||||
__slots__ = ('_val',)
|
||||
|
||||
def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0):
|
||||
if isinstance(val, (bytes, bytearray)):
|
||||
self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True))
|
||||
elif isinstance(val, self.__class__._type):
|
||||
self._val = val
|
||||
elif isinstance(val, (c_uint32, c_int32, Int32)):
|
||||
self._val = self.__class__._type(val.value)
|
||||
elif isinstance(val, int):
|
||||
self._val = self.__class__._type(val)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val)
|
||||
)
|
||||
|
||||
def __add__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
|
||||
return self.__class__(self._val.value + other)
|
||||
|
||||
def __sub__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value - other)
|
||||
|
||||
def __mul__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value * other)
|
||||
|
||||
def __truediv__(self, other):
|
||||
return self // other
|
||||
|
||||
def __floordiv__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self.value // other)
|
||||
|
||||
def __mod__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value % other)
|
||||
|
||||
def __and__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value & other)
|
||||
|
||||
def __or__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value | other)
|
||||
|
||||
def __xor__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value ^ other)
|
||||
|
||||
def __lshift__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self.value << other)
|
||||
|
||||
def __rshift__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self.value >> other)
|
||||
|
||||
def __eq__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value == other
|
||||
|
||||
def __neg__(self):
|
||||
return self.__class__(-self._val.value)
|
||||
|
||||
def __abs__(self):
|
||||
return self.__class__(abs(self.value))
|
||||
|
||||
def __bytes__(self):
|
||||
return self.to_bytes(4)
|
||||
|
||||
def __repr__(self):
|
||||
return '{}({})'.format(self.__class__.__name__, self.value)
|
||||
|
||||
def __str__(self):
|
||||
return str(self.value)
|
||||
|
||||
def __format__(self, format_spec):
|
||||
return self.value.__format__(format_spec)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.value)
|
||||
|
||||
def __gt__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value > other
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value < other
|
||||
|
||||
def __le__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value <= other
|
||||
|
||||
def __ge__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value >= other
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.value)
|
||||
|
||||
def __cmp__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value.__cmp__(other)
|
||||
|
||||
# right handed binary operators
|
||||
|
||||
def __radd__(self, other):
|
||||
return self + other
|
||||
|
||||
def __rsub__(self, other):
|
||||
return self.__class__(other) - self
|
||||
|
||||
def __rmul__(self, other):
|
||||
return self * other
|
||||
|
||||
def __rtruediv__(self, other):
|
||||
return self.__class__(other) // self
|
||||
|
||||
def __rfloordiv__(self, other):
|
||||
return self.__class__(other) // self
|
||||
|
||||
def __rmod__(self, other):
|
||||
return self.__class__(other) % self
|
||||
|
||||
def __rand__(self, other):
|
||||
return self.__class__(other) & self
|
||||
|
||||
def __ror__(self, other):
|
||||
return self.__class__(other) | self
|
||||
|
||||
def __rxor__(self, other):
|
||||
return self.__class__(other) ^ self
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self._val.value
|
||||
|
||||
def unsigned(self) -> 'UInt32':
|
||||
return UInt32(self)
|
||||
|
||||
def to_bytes(self, bytes: int = 4) -> bytearray:
|
||||
return bytearray(self.unsigned_value.to_bytes(bytes, 'little'))
|
||||
|
||||
def signed(self) -> 'Int32':
|
||||
if self.__class__ == Int32:
|
||||
return self
|
||||
return Int32(self)
|
||||
|
||||
@property
|
||||
def unsigned_value(self):
|
||||
return c_uint32(self.value).value
|
||||
|
||||
def shift_right_logical(self, ammount: Union['Int32', int]):
|
||||
if isinstance(ammount, Int32):
|
||||
ammount = ammount.value
|
||||
return self.__class__((self.value % 0x100000000) >> ammount)
|
||||
|
||||
def __int__(self):
|
||||
return self.value
|
||||
|
||||
def __hex__(self):
|
||||
return hex(self.value)
|
||||
|
||||
|
||||
class UInt32(Int32):
|
||||
_type = c_uint32
|
||||
|
||||
def unsigned(self) -> 'UInt32':
|
||||
return self
|
||||
|
||||
@property
|
||||
def unsigned_value(self):
|
||||
return self._val.value
|
||||
|
||||
def shift_right_logical(self, ammount: Union['Int32', int]):
|
||||
return self >> ammount
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemoryFlags:
|
||||
read_only: bool
|
||||
executable: bool
|
||||
|
||||
def __repr__(self):
|
||||
return "r{}{}".format(
|
||||
'-' if self.read_only else 'w',
|
||||
'x' if self.executable else '-'
|
||||
)
|
||||
|
||||
|
||||
class InstructionContext:
|
||||
base_address: T_AbsoluteAddress
|
||||
"""
|
||||
The address where the instruction block is placed
|
||||
"""
|
||||
|
||||
labels: Dict[str, T_RelativeAddress]
|
||||
"""
|
||||
This dictionary maps all labels to their relative position of the instruction block
|
||||
"""
|
||||
|
||||
numbered_labels: Dict[str, List[T_RelativeAddress]]
|
||||
"""
|
||||
This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where
|
||||
the label was placed
|
||||
"""
|
||||
|
||||
global_symbol_dict: Dict[str, T_AbsoluteAddress]
|
||||
"""
|
||||
A reference to the MMU's global symbol dictionary for access to global symbols
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.labels = dict()
|
||||
self.numbered_labels = defaultdict(list)
|
||||
self.base_address = 0
|
||||
self.global_symbol_dict = dict()
|
||||
|
||||
def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]:
|
||||
if NUMBER_SYMBOL_PATTERN.match(symbol):
|
||||
if address_at is None:
|
||||
raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
|
||||
|
||||
direction = symbol[-1]
|
||||
values = self.numbered_labels.get(symbol[:-1], [])
|
||||
if direction == 'b':
|
||||
return max((addr + self.base_address for addr in values if addr < address_at), default=None)
|
||||
else:
|
||||
return min((addr + self.base_address for addr in values if addr > address_at), default=None)
|
||||
else:
|
||||
# if it's not a local symbol, try the globals
|
||||
if symbol not in self.labels:
|
||||
return self.global_symbol_dict.get(symbol, None)
|
||||
# otherwise return the local symbol
|
||||
return self.labels.get(symbol, None)
|
||||
|
||||
|
||||
class Instruction(ABC):
|
||||
name: str
|
||||
args: tuple
|
||||
|
||||
@abstractmethod
|
||||
def get_imm(self, num: int) -> int:
|
||||
"""
|
||||
parse and get immediate argument
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_imm_reg(self, num: int) -> Tuple[int, str]:
|
||||
"""
|
||||
parse and get an argument imm(reg)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_reg(self, num: int) -> str:
|
||||
"""
|
||||
parse and get an register argument
|
||||
"""
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "{} {}".format(self.name, ", ".join(self.args))
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemorySection(ABC):
|
||||
name: str
|
||||
flags: MemoryFlags
|
||||
size: int
|
||||
base: T_AbsoluteAddress
|
||||
owner: str
|
||||
context: InstructionContext
|
||||
|
||||
@property
|
||||
def end(self):
|
||||
return self.base + self.size
|
||||
|
||||
@abstractmethod
|
||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||
pass
|
||||
|
||||
def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
|
||||
bytes_per_row: int = 16, rows: int = 10, group: int = 4):
|
||||
if self.flags.executable:
|
||||
bytes_per_row = 4
|
||||
highlight = None
|
||||
if end is None:
|
||||
end = min(start + (bytes_per_row * (rows // 2)), self.size - 1)
|
||||
highlight = start
|
||||
start = max(0, start - (bytes_per_row * (rows // 2)))
|
||||
|
||||
if self.flags.executable:
|
||||
print(FMT_MEM + "{}, viewing {} instructions:".format(
|
||||
self, (end - start) // 4
|
||||
) + FMT_NONE)
|
||||
|
||||
for addr in range(start, end, 4):
|
||||
if addr == highlight:
|
||||
print(FMT_UNDERLINE + FMT_ORANGE, end='')
|
||||
print("0x{:04x}: {}{}".format(
|
||||
self.base + addr, self.read_ins(addr), FMT_NONE
|
||||
))
|
||||
else:
|
||||
print(FMT_MEM + "{}, viewing {} bytes:".format(
|
||||
self, (end - start)
|
||||
) + FMT_NONE)
|
||||
|
||||
aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end
|
||||
|
||||
for addr in range(start, aligned_end, bytes_per_row):
|
||||
hi_ind = (highlight - addr) // group if highlight is not None else -1
|
||||
print("0x{:04x}: {}{}".format(
|
||||
self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
|
||||
))
|
||||
|
||||
if aligned_end != end:
|
||||
hi_ind = (highlight - aligned_end) // group if highlight is not None else -1
|
||||
print("0x{:04x}: {}{}".format(
|
||||
self.base + aligned_end, format_bytes(
|
||||
self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind
|
||||
), FMT_NONE
|
||||
))
|
||||
|
||||
def dump_all(self, *args, **kwargs):
|
||||
self.dump(0, self.size, *args, **kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
|
||||
self.__class__.__name__,
|
||||
self.name,
|
||||
self.base,
|
||||
self.size,
|
||||
self.flags,
|
||||
self.owner
|
||||
)
|
||||
|
||||
|
||||
class Program:
|
||||
"""
|
||||
This represents a collection of sections which together form an executable program
|
||||
|
||||
When you want to create a program which can be located anywhere in memory, set base to None,
|
||||
this signals the other components, that this is relocatable. Set the base of each section to
|
||||
the offset in the program, and everything will be taken care of for you.
|
||||
|
||||
"""
|
||||
name: str
|
||||
context: InstructionContext
|
||||
global_labels: Set[str]
|
||||
relative_labels: Set[str]
|
||||
sections: List[MemorySection]
|
||||
base: Optional[T_AbsoluteAddress]
|
||||
is_loaded: bool
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
if len(self.sections) == 0:
|
||||
return 0
|
||||
if self.base is None:
|
||||
return self.sections[-1].base + self.sections[-1].size
|
||||
return (self.sections[-1].base - self.base) + self.sections[-1].size
|
||||
|
||||
def __init__(self, name: str, base: Optional[int] = None):
|
||||
self.name = name
|
||||
self.context = InstructionContext()
|
||||
self.sections = []
|
||||
self.global_labels = set()
|
||||
self.relative_labels = set()
|
||||
self.base = base
|
||||
self.is_loaded = False
|
||||
|
||||
def add_section(self, sec: MemorySection):
|
||||
# print a warning when a section is located before the programs base
|
||||
if self.base is not None:
|
||||
if sec.base < self.base:
|
||||
print(
|
||||
FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format(
|
||||
sec, self.name, self.base
|
||||
) + FMT_NONE)
|
||||
|
||||
self.sections.append(sec)
|
||||
# keep section list ordered
|
||||
self.sections.sort(key=lambda section: section.base)
|
||||
|
||||
def __repr__(self):
|
||||
return "{}(name={},sections={},base={})".format(
|
||||
self.__class__.__name__, self.name, self.global_labels,
|
||||
[s.name for s in self.sections], self.base
|
||||
)
|
||||
|
||||
@property
|
||||
def entrypoint(self):
|
||||
if '_start' in self.context.labels:
|
||||
return self.context.labels.get('_start')
|
||||
if 'main' in self.context.labels:
|
||||
return self.context.labels.get('main')
|
||||
for sec in self.sections:
|
||||
if get_section_base_name(sec.name) == '.text' and sec.flags.executable:
|
||||
return sec.base
|
||||
|
||||
def loaded_trigger(self, at_addr: T_AbsoluteAddress):
|
||||
"""
|
||||
This trigger is called when the binary is loaded and its final address in memory is determined
|
||||
|
||||
This will do a small sanity check to prevent programs loading twice, or at addresses they don't
|
||||
expect to be loaded.
|
||||
|
||||
Then it will finalize all relative symbols defined in it to point to the correct addresses.
|
||||
|
||||
:param at_addr: the address where the program will be located
|
||||
"""
|
||||
if self.is_loaded:
|
||||
if at_addr != self.base:
|
||||
raise RuntimeError("Program loaded twice at different addresses! This will probably break things!")
|
||||
return
|
||||
|
||||
if self.base is not None and self.base != at_addr:
|
||||
print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, '
|
||||
'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE)
|
||||
|
||||
# check if we are relocating
|
||||
if self.base != at_addr:
|
||||
offset = at_addr if self.base is None else at_addr - self.base
|
||||
|
||||
# move all sections by the offset
|
||||
for sec in self.sections:
|
||||
sec.base += offset
|
||||
|
||||
# move all relative symbols by the offset
|
||||
for name in self.relative_labels:
|
||||
self.context.labels[name] += offset
|
||||
|
||||
self.base = at_addr
|
||||
self.context.base_address = at_addr
|
||||
|
||||
|
||||
class ProgramLoader(ABC):
|
||||
"""
|
||||
A program loader is always specific to a given source file. It is a place to store all state
|
||||
concerning the parsing and loading of that specific source file, including options.
|
||||
"""
|
||||
|
||||
def __init__(self, source_path: str, options: T_ParserOpts):
|
||||
self.source_path = source_path
|
||||
self.options = options
|
||||
self.filename = os.path.split(self.source_path)[-1]
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def can_parse(cls, source_path: str) -> float:
|
||||
"""
|
||||
Return confidence that the file located at source_path
|
||||
should be parsed and loaded by this loader
|
||||
:param source_path: the path of the source file
|
||||
:return: the confidence that this file belongs to this parser
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
|
||||
"""
|
||||
parse command line args into an options dictionary
|
||||
|
||||
:param argv: the command line args list
|
||||
:return: all remaining command line args and the parser options object
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
|
||||
"""
|
||||
Instantiate a loader for the given source file with the required arguments
|
||||
|
||||
:param source_path: the path to the source file
|
||||
:param options: the parsed options (guaranteed to come from this classes get_options method.
|
||||
:return: An instance of a ProgramLoader for the spcified source
|
||||
"""
|
||||
return cls(source_path, options)
|
||||
|
||||
@abstractmethod
|
||||
def parse(self) -> Union[Program, Iterator[Program]]:
|
||||
"""
|
||||
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class CPU(ABC):
|
||||
# static cpu configuration
|
||||
INS_XLEN: int = 4
|
||||
|
||||
# housekeeping variables
|
||||
regs: Registers
|
||||
mmu: 'MMU'
|
||||
pc: T_AbsoluteAddress
|
||||
cycle: int
|
||||
halted: bool
|
||||
|
||||
# debugging context
|
||||
debugger_active: bool
|
||||
|
||||
# instruction information
|
||||
instructions: Dict[str, Callable[[Instruction], None]]
|
||||
instruction_sets: Set['InstructionSet']
|
||||
|
||||
# configuration
|
||||
conf: RunConfig
|
||||
|
||||
def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig):
|
||||
self.mmu = mmu
|
||||
self.regs = Registers()
|
||||
self.conf = conf
|
||||
|
||||
self.instruction_sets = set()
|
||||
self.instructions = dict()
|
||||
|
||||
for set_class in instruction_sets:
|
||||
ins_set = set_class(self)
|
||||
self.instructions.update(ins_set.load())
|
||||
self.instruction_sets.add(ins_set)
|
||||
|
||||
self.halted = False
|
||||
self.cycle = 0
|
||||
self.pc = 0
|
||||
self.debugger_active = False
|
||||
|
||||
def run_instruction(self, ins: Instruction):
|
||||
"""
|
||||
Execute a single instruction
|
||||
|
||||
:param ins: The instruction to execute
|
||||
"""
|
||||
if ins.name in self.instructions:
|
||||
self.instructions[ins.name](ins)
|
||||
else:
|
||||
# this should never be reached, as unknown instructions are imparseable
|
||||
raise RuntimeError("Unknown instruction: {}".format(ins))
|
||||
|
||||
def load_program(self, program: Program):
|
||||
self.mmu.load_program(program)
|
||||
|
||||
def __repr__(self):
|
||||
"""
|
||||
Returns a representation of the CPU and some of its state.
|
||||
"""
|
||||
return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format(
|
||||
self.__class__.__name__,
|
||||
self.pc,
|
||||
self.cycle,
|
||||
self.halted,
|
||||
" ".join(s.name for s in self.instruction_sets)
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def step(self, verbose=False):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def run(self, verbose=False):
|
||||
pass
|
||||
|
||||
def launch(self, program: Program, verbose: bool = False):
|
||||
if program not in self.mmu.programs:
|
||||
print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE)
|
||||
return
|
||||
|
||||
self.pc = program.entrypoint
|
||||
self.run(verbose)
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
|
||||
pass
|
||||
|
||||
def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]:
|
||||
return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name))
|
||||
|
||||
@property
|
||||
def sections(self):
|
||||
return self.mmu.sections
|
||||
|
||||
@property
|
||||
def programs(self):
|
||||
return self.mmu.programs
|
@ -0,0 +1,26 @@
|
||||
from typing import Dict
|
||||
import re
|
||||
|
||||
# define some base type aliases so we can keep track of absolute and relative addresses
|
||||
T_RelativeAddress = int
|
||||
T_AbsoluteAddress = int
|
||||
|
||||
# parser options are just dictionaries with arbitrary values
|
||||
T_ParserOpts = Dict[str, any]
|
||||
|
||||
NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
|
||||
|
||||
from .flags import MemoryFlags
|
||||
from .int32 import UInt32, Int32
|
||||
from .instruction import Instruction
|
||||
from .instruction_context import InstructionContext
|
||||
from .memory_section import MemorySection
|
||||
from .program import Program
|
||||
from .program_loader import ProgramLoader
|
||||
from .cpu import CPU
|
||||
from .simple_instruction import SimpleInstruction
|
||||
from .instruction_memory_section import InstructionMemorySection
|
||||
from .binary_data_memory_section import BinaryDataMemorySection
|
||||
|
||||
|
||||
|
@ -0,0 +1,29 @@
|
||||
from . import MemorySection, InstructionContext, MemoryFlags, T_RelativeAddress, Instruction
|
||||
from ..exceptions import MemoryAccessException
|
||||
|
||||
|
||||
class BinaryDataMemorySection(MemorySection):
|
||||
def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None):
|
||||
self.name = name
|
||||
self.base = base
|
||||
self.context = context
|
||||
self.size = len(data)
|
||||
self.flags = flags if flags is not None else MemoryFlags(False, False)
|
||||
self.data = data
|
||||
self.owner = owner
|
||||
|
||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||
if offset + size > self.size:
|
||||
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
|
||||
return self.data[offset:offset + size]
|
||||
|
||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||
if offset + size > self.size:
|
||||
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
|
||||
if len(data[0:size]) != size:
|
||||
raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
|
||||
self.data[offset:offset + size] = data[0:size]
|
||||
|
||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||
raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
|
||||
offset, 4, 'instruction fetch')
|
@ -0,0 +1,107 @@
|
||||
import typing
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Type, Callable, Set, Dict
|
||||
|
||||
from ..registers import Registers
|
||||
from ..config import RunConfig
|
||||
from ..colors import FMT_RED, FMT_NONE
|
||||
from . import T_AbsoluteAddress, Instruction, Program, ProgramLoader
|
||||
|
||||
|
||||
class CPU(ABC):
|
||||
# static cpu configuration
|
||||
INS_XLEN: int = 4
|
||||
|
||||
# housekeeping variables
|
||||
regs: Registers
|
||||
mmu: 'MMU'
|
||||
pc: T_AbsoluteAddress
|
||||
cycle: int
|
||||
halted: bool
|
||||
|
||||
# debugging context
|
||||
debugger_active: bool
|
||||
|
||||
# instruction information
|
||||
instructions: Dict[str, Callable[[Instruction], None]]
|
||||
instruction_sets: Set['InstructionSet']
|
||||
|
||||
# configuration
|
||||
conf: RunConfig
|
||||
|
||||
def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig):
|
||||
self.mmu = mmu
|
||||
self.regs = Registers()
|
||||
self.conf = conf
|
||||
|
||||
self.instruction_sets = set()
|
||||
self.instructions = dict()
|
||||
|
||||
for set_class in instruction_sets:
|
||||
ins_set = set_class(self)
|
||||
self.instructions.update(ins_set.load())
|
||||
self.instruction_sets.add(ins_set)
|
||||
|
||||
self.halted = False
|
||||
self.cycle = 0
|
||||
self.pc = 0
|
||||
self.debugger_active = False
|
||||
|
||||
def run_instruction(self, ins: Instruction):
|
||||
"""
|
||||
Execute a single instruction
|
||||
|
||||
:param ins: The instruction to execute
|
||||
"""
|
||||
if ins.name in self.instructions:
|
||||
self.instructions[ins.name](ins)
|
||||
else:
|
||||
# this should never be reached, as unknown instructions are imparseable
|
||||
raise RuntimeError("Unknown instruction: {}".format(ins))
|
||||
|
||||
def load_program(self, program: Program):
|
||||
self.mmu.load_program(program)
|
||||
|
||||
def __repr__(self):
|
||||
"""
|
||||
Returns a representation of the CPU and some of its state.
|
||||
"""
|
||||
return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format(
|
||||
self.__class__.__name__,
|
||||
self.pc,
|
||||
self.cycle,
|
||||
self.halted,
|
||||
" ".join(s.name for s in self.instruction_sets)
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def step(self, verbose=False):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def run(self, verbose=False):
|
||||
pass
|
||||
|
||||
def launch(self, program: Program, verbose: bool = False):
|
||||
if program not in self.mmu.programs:
|
||||
print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE)
|
||||
return
|
||||
|
||||
self.pc = program.entrypoint
|
||||
self.run(verbose)
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
|
||||
pass
|
||||
|
||||
def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]:
|
||||
return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name))
|
||||
|
||||
@property
|
||||
def sections(self):
|
||||
return self.mmu.sections
|
||||
|
||||
@property
|
||||
def programs(self):
|
||||
return self.mmu.programs
|
@ -0,0 +1,13 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemoryFlags:
|
||||
read_only: bool
|
||||
executable: bool
|
||||
|
||||
def __repr__(self):
|
||||
return "r{}{}".format(
|
||||
'-' if self.read_only else 'w',
|
||||
'x' if self.executable else '-'
|
||||
)
|
@ -0,0 +1,31 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
class Instruction(ABC):
|
||||
name: str
|
||||
args: tuple
|
||||
|
||||
@abstractmethod
|
||||
def get_imm(self, num: int) -> int:
|
||||
"""
|
||||
parse and get immediate argument
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_imm_reg(self, num: int) -> Tuple[int, str]:
|
||||
"""
|
||||
parse and get an argument imm(reg)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_reg(self, num: int) -> str:
|
||||
"""
|
||||
parse and get an register argument
|
||||
"""
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "{} {}".format(self.name, ", ".join(self.args))
|
@ -0,0 +1,53 @@
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from ..exceptions import ParseException
|
||||
from ..types import T_AbsoluteAddress, T_RelativeAddress, NUMBER_SYMBOL_PATTERN
|
||||
|
||||
|
||||
class InstructionContext:
|
||||
base_address: T_AbsoluteAddress
|
||||
"""
|
||||
The address where the instruction block is placed
|
||||
"""
|
||||
|
||||
labels: Dict[str, T_RelativeAddress]
|
||||
"""
|
||||
This dictionary maps all labels to their relative position of the instruction block
|
||||
"""
|
||||
|
||||
numbered_labels: Dict[str, List[T_RelativeAddress]]
|
||||
"""
|
||||
This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where
|
||||
the label was placed
|
||||
"""
|
||||
|
||||
global_symbol_dict: Dict[str, T_AbsoluteAddress]
|
||||
"""
|
||||
A reference to the MMU's global symbol dictionary for access to global symbols
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.labels = dict()
|
||||
self.numbered_labels = defaultdict(list)
|
||||
self.base_address = 0
|
||||
self.global_symbol_dict = dict()
|
||||
|
||||
def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]:
|
||||
if NUMBER_SYMBOL_PATTERN.match(symbol):
|
||||
if address_at is None:
|
||||
raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
|
||||
|
||||
direction = symbol[-1]
|
||||
values = self.numbered_labels.get(symbol[:-1], [])
|
||||
if direction == 'b':
|
||||
return max((addr + self.base_address for addr in values if addr < address_at), default=None)
|
||||
else:
|
||||
return min((addr + self.base_address for addr in values if addr > address_at), default=None)
|
||||
else:
|
||||
# if it's not a local symbol, try the globals
|
||||
if symbol not in self.labels:
|
||||
return self.global_symbol_dict.get(symbol, None)
|
||||
# otherwise return the local symbol
|
||||
return self.labels.get(symbol, None)
|
||||
|
@ -0,0 +1,27 @@
|
||||
from typing import List
|
||||
|
||||
from . import MemorySection, Instruction, InstructionContext, MemoryFlags, T_RelativeAddress
|
||||
from .. import MemoryAccessException
|
||||
|
||||
|
||||
class InstructionMemorySection(MemorySection):
|
||||
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0):
|
||||
self.name = name
|
||||
self.base = base
|
||||
self.context = context
|
||||
self.size = len(instructions) * 4
|
||||
self.flags = MemoryFlags(True, True)
|
||||
self.instructions = instructions
|
||||
self.owner = owner
|
||||
|
||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
|
||||
|
||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||
raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
|
||||
|
||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||
if offset % 4 != 0:
|
||||
raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
|
||||
return self.instructions[offset // 4]
|
||||
|
@ -0,0 +1,202 @@
|
||||
from typing import Union
|
||||
from ctypes import c_int32, c_uint32
|
||||
|
||||
|
||||
class Int32:
|
||||
_type = c_int32
|
||||
__slots__ = ('_val',)
|
||||
|
||||
def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0):
|
||||
if isinstance(val, (bytes, bytearray)):
|
||||
self._val = self.__class__._type(int.from_bytes(val, 'little', signed=True))
|
||||
elif isinstance(val, self.__class__._type):
|
||||
self._val = val
|
||||
elif isinstance(val, (c_uint32, c_int32, Int32)):
|
||||
self._val = self.__class__._type(val.value)
|
||||
elif isinstance(val, int):
|
||||
self._val = self.__class__._type(val)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val)
|
||||
)
|
||||
|
||||
def __add__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
|
||||
return self.__class__(self._val.value + other)
|
||||
|
||||
def __sub__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value - other)
|
||||
|
||||
def __mul__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value * other)
|
||||
|
||||
def __truediv__(self, other):
|
||||
return self // other
|
||||
|
||||
def __floordiv__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self.value // other)
|
||||
|
||||
def __mod__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value % other)
|
||||
|
||||
def __and__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value & other)
|
||||
|
||||
def __or__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value | other)
|
||||
|
||||
def __xor__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self._val.value ^ other)
|
||||
|
||||
def __lshift__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self.value << other)
|
||||
|
||||
def __rshift__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.__class__(self.value >> other)
|
||||
|
||||
def __eq__(self, other: Union['Int32', int]):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value == other
|
||||
|
||||
def __neg__(self):
|
||||
return self.__class__(-self._val.value)
|
||||
|
||||
def __abs__(self):
|
||||
return self.__class__(abs(self.value))
|
||||
|
||||
def __bytes__(self):
|
||||
return self.to_bytes(4)
|
||||
|
||||
def __repr__(self):
|
||||
return '{}({})'.format(self.__class__.__name__, self.value)
|
||||
|
||||
def __str__(self):
|
||||
return str(self.value)
|
||||
|
||||
def __format__(self, format_spec):
|
||||
return self.value.__format__(format_spec)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.value)
|
||||
|
||||
def __gt__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value > other
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value < other
|
||||
|
||||
def __le__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value <= other
|
||||
|
||||
def __ge__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value >= other
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.value)
|
||||
|
||||
def __cmp__(self, other):
|
||||
if isinstance(other, Int32):
|
||||
other = other.value
|
||||
return self.value.__cmp__(other)
|
||||
|
||||
# right handed binary operators
|
||||
|
||||
def __radd__(self, other):
|
||||
return self + other
|
||||
|
||||
def __rsub__(self, other):
|
||||
return self.__class__(other) - self
|
||||
|
||||
def __rmul__(self, other):
|
||||
return self * other
|
||||
|
||||
def __rtruediv__(self, other):
|
||||
return self.__class__(other) // self
|
||||
|
||||
def __rfloordiv__(self, other):
|
||||
return self.__class__(other) // self
|
||||
|
||||
def __rmod__(self, other):
|
||||
return self.__class__(other) % self
|
||||
|
||||
def __rand__(self, other):
|
||||
return self.__class__(other) & self
|
||||
|
||||
def __ror__(self, other):
|
||||
return self.__class__(other) | self
|
||||
|
||||
def __rxor__(self, other):
|
||||
return self.__class__(other) ^ self
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self._val.value
|
||||
|
||||
def unsigned(self) -> 'UInt32':
|
||||
return UInt32(self)
|
||||
|
||||
def to_bytes(self, bytes: int = 4) -> bytearray:
|
||||
return bytearray(self.unsigned_value.to_bytes(bytes, 'little'))
|
||||
|
||||
def signed(self) -> 'Int32':
|
||||
if self.__class__ == Int32:
|
||||
return self
|
||||
return Int32(self)
|
||||
|
||||
@property
|
||||
def unsigned_value(self):
|
||||
return c_uint32(self.value).value
|
||||
|
||||
def shift_right_logical(self, ammount: Union['Int32', int]):
|
||||
if isinstance(ammount, Int32):
|
||||
ammount = ammount.value
|
||||
return self.__class__((self.value % 0x100000000) >> ammount)
|
||||
|
||||
def __int__(self):
|
||||
return self.value
|
||||
|
||||
def __hex__(self):
|
||||
return hex(self.value)
|
||||
|
||||
|
||||
class UInt32(Int32):
|
||||
_type = c_uint32
|
||||
|
||||
def unsigned(self) -> 'UInt32':
|
||||
return self
|
||||
|
||||
@property
|
||||
def unsigned_value(self):
|
||||
return self._val.value
|
||||
|
||||
def shift_right_logical(self, ammount: Union['Int32', int]):
|
||||
return self >> ammount
|
@ -0,0 +1,88 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from ..colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE
|
||||
from ..helpers import format_bytes
|
||||
from . import MemoryFlags, T_AbsoluteAddress, InstructionContext, T_RelativeAddress, Instruction
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemorySection(ABC):
|
||||
name: str
|
||||
flags: MemoryFlags
|
||||
size: int
|
||||
base: T_AbsoluteAddress
|
||||
owner: str
|
||||
context: InstructionContext
|
||||
|
||||
@property
|
||||
def end(self):
|
||||
return self.base + self.size
|
||||
|
||||
@abstractmethod
|
||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||
pass
|
||||
|
||||
def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
|
||||
bytes_per_row: int = 16, rows: int = 10, group: int = 4):
|
||||
if self.flags.executable:
|
||||
bytes_per_row = 4
|
||||
highlight = None
|
||||
if end is None:
|
||||
end = min(start + (bytes_per_row * (rows // 2)), self.size - 1)
|
||||
highlight = start
|
||||
start = max(0, start - (bytes_per_row * (rows // 2)))
|
||||
|
||||
if self.flags.executable:
|
||||
print(FMT_MEM + "{}, viewing {} instructions:".format(
|
||||
self, (end - start) // 4
|
||||
) + FMT_NONE)
|
||||
|
||||
for addr in range(start, end, 4):
|
||||
if addr == highlight:
|
||||
print(FMT_UNDERLINE + FMT_ORANGE, end='')
|
||||
print("0x{:04x}: {}{}".format(
|
||||
self.base + addr, self.read_ins(addr), FMT_NONE
|
||||
))
|
||||
else:
|
||||
print(FMT_MEM + "{}, viewing {} bytes:".format(
|
||||
self, (end - start)
|
||||
) + FMT_NONE)
|
||||
|
||||
aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end
|
||||
|
||||
for addr in range(start, aligned_end, bytes_per_row):
|
||||
hi_ind = (highlight - addr) // group if highlight is not None else -1
|
||||
print("0x{:04x}: {}{}".format(
|
||||
self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
|
||||
))
|
||||
|
||||
if aligned_end != end:
|
||||
hi_ind = (highlight - aligned_end) // group if highlight is not None else -1
|
||||
print("0x{:04x}: {}{}".format(
|
||||
self.base + aligned_end, format_bytes(
|
||||
self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind
|
||||
), FMT_NONE
|
||||
))
|
||||
|
||||
def dump_all(self, *args, **kwargs):
|
||||
self.dump(0, self.size, *args, **kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
|
||||
self.__class__.__name__,
|
||||
self.name,
|
||||
self.base,
|
||||
self.size,
|
||||
self.flags,
|
||||
self.owner
|
||||
)
|
@ -0,0 +1,104 @@
|
||||
from typing import List, Optional, Set
|
||||
|
||||
from ..colors import FMT_RED, FMT_BOLD, FMT_NONE, FMT_MEM
|
||||
from ..helpers import get_section_base_name
|
||||
from . import InstructionContext, T_AbsoluteAddress, MemorySection
|
||||
|
||||
|
||||
class Program:
|
||||
"""
|
||||
This represents a collection of sections which together form an executable program
|
||||
|
||||
When you want to create a program which can be located anywhere in memory, set base to None,
|
||||
this signals the other components, that this is relocatable. Set the base of each section to
|
||||
the offset in the program, and everything will be taken care of for you.
|
||||
|
||||
"""
|
||||
name: str
|
||||
context: InstructionContext
|
||||
global_labels: Set[str]
|
||||
relative_labels: Set[str]
|
||||
sections: List[MemorySection]
|
||||
base: Optional[T_AbsoluteAddress]
|
||||
is_loaded: bool
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
if len(self.sections) == 0:
|
||||
return 0
|
||||
if self.base is None:
|
||||
return self.sections[-1].base + self.sections[-1].size
|
||||
return (self.sections[-1].base - self.base) + self.sections[-1].size
|
||||
|
||||
def __init__(self, name: str, base: Optional[int] = None):
|
||||
self.name = name
|
||||
self.context = InstructionContext()
|
||||
self.sections = []
|
||||
self.global_labels = set()
|
||||
self.relative_labels = set()
|
||||
self.base = base
|
||||
self.is_loaded = False
|
||||
|
||||
def add_section(self, sec: MemorySection):
|
||||
# print a warning when a section is located before the programs base
|
||||
if self.base is not None:
|
||||
if sec.base < self.base:
|
||||
print(
|
||||
FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format(
|
||||
sec, self.name, self.base
|
||||
) + FMT_NONE)
|
||||
|
||||
self.sections.append(sec)
|
||||
# keep section list ordered
|
||||
self.sections.sort(key=lambda section: section.base)
|
||||
|
||||
def __repr__(self):
|
||||
return "{}(name={},sections={},base={})".format(
|
||||
self.__class__.__name__, self.name, self.global_labels,
|
||||
[s.name for s in self.sections], self.base
|
||||
)
|
||||
|
||||
@property
|
||||
def entrypoint(self):
|
||||
if '_start' in self.context.labels:
|
||||
return self.context.labels.get('_start')
|
||||
if 'main' in self.context.labels:
|
||||
return self.context.labels.get('main')
|
||||
for sec in self.sections:
|
||||
if get_section_base_name(sec.name) == '.text' and sec.flags.executable:
|
||||
return sec.base
|
||||
|
||||
def loaded_trigger(self, at_addr: T_AbsoluteAddress):
|
||||
"""
|
||||
This trigger is called when the binary is loaded and its final address in memory is determined
|
||||
|
||||
This will do a small sanity check to prevent programs loading twice, or at addresses they don't
|
||||
expect to be loaded.
|
||||
|
||||
Then it will finalize all relative symbols defined in it to point to the correct addresses.
|
||||
|
||||
:param at_addr: the address where the program will be located
|
||||
"""
|
||||
if self.is_loaded:
|
||||
if at_addr != self.base:
|
||||
raise RuntimeError("Program loaded twice at different addresses! This will probably break things!")
|
||||
return
|
||||
|
||||
if self.base is not None and self.base != at_addr:
|
||||
print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, '
|
||||
'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE)
|
||||
|
||||
# check if we are relocating
|
||||
if self.base != at_addr:
|
||||
offset = at_addr if self.base is None else at_addr - self.base
|
||||
|
||||
# move all sections by the offset
|
||||
for sec in self.sections:
|
||||
sec.base += offset
|
||||
|
||||
# move all relative symbols by the offset
|
||||
for name in self.relative_labels:
|
||||
self.context.labels[name] += offset
|
||||
|
||||
self.base = at_addr
|
||||
self.context.base_address = at_addr
|
@ -0,0 +1,58 @@
|
||||
import os
|
||||
from abc import abstractmethod, ABC
|
||||
from typing import Union, Iterator, List
|
||||
|
||||
from . import T_ParserOpts, Program
|
||||
|
||||
|
||||
class ProgramLoader(ABC):
|
||||
"""
|
||||
A program loader is always specific to a given source file. It is a place to store all state
|
||||
concerning the parsing and loading of that specific source file, including options.
|
||||
"""
|
||||
|
||||
def __init__(self, source_path: str, options: T_ParserOpts):
|
||||
self.source_path = source_path
|
||||
self.options = options
|
||||
self.filename = os.path.split(self.source_path)[-1]
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def can_parse(cls, source_path: str) -> float:
|
||||
"""
|
||||
Return confidence that the file located at source_path
|
||||
should be parsed and loaded by this loader
|
||||
:param source_path: the path of the source file
|
||||
:return: the confidence that this file belongs to this parser
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
|
||||
"""
|
||||
parse command line args into an options dictionary
|
||||
|
||||
:param argv: the command line args list
|
||||
:return: all remaining command line args and the parser options object
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
|
||||
"""
|
||||
Instantiate a loader for the given source file with the required arguments
|
||||
|
||||
:param source_path: the path to the source file
|
||||
:param options: the parsed options (guaranteed to come from this classes get_options method.
|
||||
:return: An instance of a ProgramLoader for the spcified source
|
||||
"""
|
||||
return cls(source_path, options)
|
||||
|
||||
@abstractmethod
|
||||
def parse(self) -> Union[Program, Iterator[Program]]:
|
||||
"""
|
||||
|
||||
:return:
|
||||
"""
|
||||
pass
|
@ -0,0 +1,26 @@
|
||||
from typing import Union, Tuple
|
||||
|
||||
from . import Instruction, T_RelativeAddress, InstructionContext
|
||||
from ..helpers import parse_numeric_argument
|
||||
|
||||
|
||||
class SimpleInstruction(Instruction):
|
||||
def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]],
|
||||
context: InstructionContext, addr: T_RelativeAddress):
|
||||
self.context = context
|
||||
self.name = name
|
||||
self.args = args
|
||||
self.addr = addr
|
||||
|
||||
def get_imm(self, num: int) -> int:
|
||||
resolved_label = self.context.resolve_label(self.args[num], self.addr)
|
||||
if resolved_label is None:
|
||||
return parse_numeric_argument(self.args[num])
|
||||
return resolved_label
|
||||
|
||||
def get_imm_reg(self, num: int) -> Tuple[int, str]:
|
||||
return self.get_imm(num + 1), self.get_reg(num)
|
||||
|
||||
def get_reg(self, num: int) -> str:
|
||||
return self.args[num]
|
||||
|
Loading…
Reference in New Issue