Merge pull request #5 from AntonLydike/assembly-parser-rework

- Completely revamped parsing of readable assembly
 - Completely revamped internal types
 - Added hard and correct 32 bit integer handling in registers
This commit is contained in:
Anton Lydike 2022-03-31 22:47:55 +02:00 committed by GitHub
commit 4b77ce05a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
64 changed files with 2838 additions and 1884 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@ __pycache__
.mypy_cache
dist/
riscemu.egg-info
build/

4
.idea/riscemu.iml generated
View File

@ -2,7 +2,11 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/test" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/venv" />
<excludeFolder url="file://$MODULE_DIR$/dist" />
<excludeFolder url="file://$MODULE_DIR$/.mypy_cache" />
<excludeFolder url="file://$MODULE_DIR$/riscemu.egg-info" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />

View File

@ -12,7 +12,7 @@ sphinx:
# Optionally set the version of Python and requirements required to build your docs
python:
version: 3.7
version: "3.7"
system_packages: true
install:
- requirements: sphinx-docs/requirements.txt

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2021 Anton Lydike
Copyright (c) 2021-2022 Anton Lydike
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -8,7 +8,7 @@ or [riscemu.datenvorr.at](https://riscemu.datenvorr.at/index.html).
This emulator contains:
* RISC-V Assembly parser
* RISC-V Assembly loader
* Emulation for parts of the basic RISC-V instruction set
* Emulation for most parts of the basic RISC-V instruction set and the M and A extensions
* Naive memory emulator
* Basic implementation of some syscalls
* A debugging environment
@ -97,7 +97,6 @@ generate and make all doc files for you. Finally, you can open the docs locall b
* RISC-V reference card: https://www.cl.cam.ac.uk/teaching/1617/ECAD+Arch/files/docs/RISCVGreenCardv8-20151013.pdf
## TODO:
* Currently registers don't enforce 32 bit (no overflows etc)
* Correctly handle 12 and 20 bit immediate (currently not limited to bits at all)
* Add a cycle limit to the options and CPU to catch infinite loops
* Move away from `print` and use `logging.logger` instead

View File

@ -6,7 +6,7 @@ echo "Generating docs!"
if ! command -v 'sphinx-apidoc'; then
source venv/bin/activate
pip install sphinx
pip install -r sphinx-docs/requirements.txt
fi

View File

@ -1,189 +1,114 @@
"""
RiscEmu (c) 2021 Anton Lydike
RiscEmu (c) 2021-2022 Anton Lydike
SPDX-License-Identifier: MIT
This file contains the CPU logic (not the individual instruction sets). See instructions/InstructionSet.py for more info
This file contains the CPU logic (not the individual instruction sets). See instructions/instruction_set.py for more info
on them.
"""
import sys
from typing import Tuple, List, Dict, Callable, Type
from .Tokenizer import RiscVTokenizer
from .Executable import MemoryFlags
from .Syscall import SyscallInterface, get_syscall_symbols
from .Exceptions import RiscemuBaseException, LaunchDebuggerException
from .MMU import MMU
from .Config import RunConfig
from .Registers import Registers
from .debug import launch_debug_session
from .colors import FMT_CPU, FMT_NONE, FMT_ERROR
import typing
from typing import List, Type
import riscemu
import typing
from .config import RunConfig
from .MMU import MMU
from .colors import FMT_CPU, FMT_NONE
from .debug import launch_debug_session
from .types.exceptions import RiscemuBaseException, LaunchDebuggerException
from .syscall import SyscallInterface, get_syscall_symbols
from .types import CPU, ProgramLoader, Int32, BinaryDataMemorySection
from .parser import AssemblyFileLoader
if typing.TYPE_CHECKING:
from . import Executable, LoadedExecutable, LoadedInstruction
from .instructions.InstructionSet import InstructionSet
from .instructions.instruction_set import InstructionSet
class CPU:
class UserModeCPU(CPU):
"""
This class represents a single CPU. It holds references to it's mmu, registers and syscall interrupt handler.
It is initialized with a configuration and a list of instruction sets.
"""
INS_XLEN = 1
def __init__(self, conf: RunConfig, instruction_sets: List[Type['riscemu.InstructionSet']]):
def __init__(self, instruction_sets: List[Type['riscemu.InstructionSet']], conf: RunConfig):
"""
Creates a CPU instance.
:param conf: An instance of the current RunConfiguration
:param instruction_sets: A list of instruction set classes. These must inherit from the InstructionSet class
"""
# setup CPU states
self.pc = 0
self.cycle = 0
self.exit: bool = False
self.exit_code: int = 0
self.conf = conf
self.active_debug = False # if a debugging session is currently runnign
super().__init__(MMU(), instruction_sets, conf)
self.stack: typing.Optional['riscemu.LoadedMemorySection'] = None
self.exit_code = 0
# setup MMU, registers and syscall handlers
self.mmu = MMU(conf)
self.regs = Registers(conf)
# setup syscall interface
self.syscall_int = SyscallInterface()
# load all instruction sets
self.instruction_sets: List[riscemu.InstructionSet] = list()
self.instructions: Dict[str, Callable[[LoadedInstruction], None]] = dict()
for set_class in instruction_sets:
ins_set = set_class(self)
self.instructions.update(ins_set.load())
self.instruction_sets.append(ins_set)
# add global syscall symbols, but don't overwrite any user-defined symbols
syscall_symbols = get_syscall_symbols()
syscall_symbols.update(self.mmu.global_symbols)
self.mmu.global_symbols.update(syscall_symbols)
# provide global syscall symbols if option is set
if conf.include_scall_symbols:
self.mmu.global_symbols.update(get_syscall_symbols())
def get_tokenizer(self, tokenizer_input):
"""
Returns a tokenizer that respects the language of the CPU
:param tokenizer_input: an instance of the RiscVTokenizerInput class
"""
return RiscVTokenizer(tokenizer_input, self.all_instructions())
def load(self, e: riscemu.Executable):
"""
Load an executable into Memory
"""
return self.mmu.load_bin(e)
def run_loaded(self, le: 'riscemu.LoadedExecutable'):
"""
Run a loaded executable
"""
self.pc = le.run_ptr
if self.conf.stack_size > 0:
self.stack = self.mmu.allocate_section("stack", self.conf.stack_size, MemoryFlags(False, False))
self.regs.set('sp', self.stack.base + self.stack.size)
print(FMT_CPU + '[CPU] Allocated {} bytes of stack'.format(self.stack.size) + FMT_NONE)
print(FMT_CPU + '[CPU] Started running from 0x{:08X} ({})'.format(le.run_ptr, le.name) + FMT_NONE)
self._run()
def continue_from_debugger(self, verbose=True):
"""
called from the debugger to continue running
:param verbose: If True, will print each executed instruction to STDOUT
"""
self._run(verbose)
def step(self):
def step(self, verbose=False):
"""
Execute a single instruction, then return.
"""
if self.exit:
if self.halted:
print(FMT_CPU + "[CPU] Program exited with code {}".format(self.exit_code) + FMT_NONE)
else:
try:
self.cycle += 1
ins = self.mmu.read_ins(self.pc)
print(FMT_CPU + " Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins))
self.pc += self.INS_XLEN
self.run_instruction(ins)
except LaunchDebuggerException:
print(FMT_CPU + "[CPU] Returning to debugger!" + FMT_NONE)
except RiscemuBaseException as ex:
self.pc -= self.INS_XLEN
print(ex.message())
return
launch_debugger = False
def _run(self, verbose=False):
if self.pc <= 0:
return False
ins = None
try:
while not self.exit:
self.cycle += 1
ins = self.mmu.read_ins(self.pc)
if verbose:
print(FMT_CPU + " Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins))
self.pc += self.INS_XLEN
self.run_instruction(ins)
self.cycle += 1
ins = self.mmu.read_ins(self.pc)
if verbose:
print(FMT_CPU + " Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins))
self.pc += self.INS_XLEN
self.run_instruction(ins)
except RiscemuBaseException as ex:
if not isinstance(ex, LaunchDebuggerException):
print(FMT_ERROR + "[CPU] excpetion caught at 0x{:08X}: {}:".format(self.pc - 1, ins) + FMT_NONE)
if isinstance(ex, LaunchDebuggerException):
# if the debugger is active, raise the exception to
if self.debugger_active:
raise ex
print(FMT_CPU + '[CPU] Debugger launch requested!' + FMT_NONE)
launch_debugger = True
else:
print(ex.message())
self.pc -= self.INS_XLEN
ex.print_stacktrace()
print(FMT_CPU + '[CPU] Halting due to exception!' + FMT_NONE)
self.halted = True
if self.active_debug:
print(FMT_CPU + "[CPU] Returning to debugger!" + FMT_NONE)
return
if self.conf.debug_on_exception:
launch_debug_session(self, self.mmu, self.regs, "Exception encountered, launching debug:")
if launch_debugger:
launch_debug_session(self)
if self.exit:
print()
print(FMT_CPU + "Program exited with code {}".format(self.exit_code) + FMT_NONE)
sys.exit(self.exit_code)
else:
print()
print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE)
def run(self, verbose=False):
while not self.halted:
self.step(verbose)
def run_instruction(self, ins: 'LoadedInstruction'):
"""
Execute a single instruction
print(FMT_CPU + "[CPU] Program exited with code {}".format(self.exit_code) + FMT_NONE)
:param ins: The instruction to execute
def setup_stack(self, stack_size=1024 * 4) -> bool:
"""
if ins.name in self.instructions:
self.instructions[ins.name](ins)
else:
# this should never be reached, as unknown instructions are imparseable
raise RuntimeError("Unknown instruction: {}".format(ins))
def all_instructions(self) -> List[str]:
Create program stack and populate stack pointer
:param stack_size: the size of the required stack, defaults to 4Kib
:return:
"""
Return a list of all instructions this CPU can execute.
"""
return list(self.instructions.keys())
def __repr__(self):
"""
Returns a representation of the CPU and some of its state.
"""
return "{}(pc=0x{:08X}, cycle={}, exit={}, instructions={})".format(
self.__class__.__name__,
self.pc,
self.cycle,
self.exit,
" ".join(s.name for s in self.instruction_sets)
stack_sec = BinaryDataMemorySection(
bytearray(stack_size),
'.stack',
None, # FIXME: why does a binary data memory section require a context?
'',
0
)
if not self.mmu.load_section(stack_sec, fixed_position=False):
return False
self.regs.set('sp', Int32(stack_sec.base + stack_sec.size))
return True
@classmethod
def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
return [AssemblyFileLoader]

View File

@ -1,319 +0,0 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
This file holds Executable and LoadedExecutable classes as well as loading and some linking code.
FIXME: refactor this code into muliple files
"""
from dataclasses import dataclass, field
from typing import Dict, List, Tuple, Union, Optional
from .Exceptions import *
from .helpers import *
from math import log
import typing
if typing.TYPE_CHECKING:
from .Tokenizer import RiscVInstructionToken
@dataclass(frozen=True)
class MemoryFlags:
read_only: bool
executable: bool
def __repr__(self):
return "{}({},{})".format(
self.__class__.__name__,
'ro' if self.read_only else 'rw',
'x' if self.executable else '-'
)
@dataclass
class MemorySection:
name: str
flags: MemoryFlags
size: int = 0
content: List[bytearray] = field(default_factory=list)
def add(self, data: bytearray):
self.content.append(data)
self.size += len(data)
def continuous_content(self, parent: 'LoadedExecutable'):
"""
converts the content into one continuous bytearray
"""
if self.size == 0:
return bytearray(0)
content = self.content[0]
for b in self.content[1:]:
content += b
return content
@dataclass
class InstructionMemorySection(MemorySection):
content: List['RiscVInstructionToken'] = field(default_factory=list)
def add_insn(self, insn: 'RiscVInstructionToken'):
self.content.append(insn)
self.size += 1
def continuous_content(self, parent: 'LoadedExecutable'):
return [
LoadedInstruction(ins.instruction, ins.args, parent)
for ins in self.content
]
@dataclass()
class Executable:
run_ptr: Tuple[str, int]
sections: Dict[str, MemorySection]
symbols: Dict[str, Tuple[str, int]]
exported_symbols: List[str]
name: str
def __repr__(self):
return "{}(sections = {}, symbols = {}, run_ptr = {}, globals={})".format(
self.__class__.__name__,
" ".join(self.sections.keys()),
" ".join(self.symbols.keys()),
self.run_ptr,
",".join(self.exported_symbols)
)
### LOADING CODE
@dataclass(frozen=True)
class LoadedInstruction:
"""
An instruction which is loaded into memory. It knows the binary it belongs to to resolve symbols
"""
name: str
args: List[str]
bin: 'LoadedExecutable'
def get_imm(self, num: int):
"""
parse and get immediate argument
"""
if len(self.args) <= num:
raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
arg = self.args[num]
# look up symbols
if self.bin.has_symb(arg):
return self.bin.lookup_symbol(arg)
return parse_numeric_argument(arg)
def get_imm_reg(self, num: int):
"""
parse and get an argument imm(reg)
"""
if len(self.args) <= num:
raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
arg = self.args[num]
ASSERT_IN("(", arg)
imm, reg = arg[:-1].split("(")
if self.bin.has_symb(imm):
return self.bin.lookup_symbol(imm), reg
return parse_numeric_argument(imm), reg
def get_reg(self, num: int):
"""
parse and get an register argument
"""
if len(self.args) <= num:
raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
return self.args[num]
def __repr__(self):
return "{} {}".format(self.name, ", ".join(self.args))
@dataclass(frozen=True)
class LoadedMemorySection:
"""
A section which is loaded into memory
"""
name: str
base: int
size: int
content: Union[List[LoadedInstruction], bytearray] = field(repr=False)
flags: MemoryFlags
owner: str
def read(self, offset: int, size: int):
if offset < 0:
raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, size, 'read')
if offset + size > self.size:
raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
size, 'read')
return self.content[offset: offset + size]
def read_instruction(self, offset):
if not self.flags.executable:
raise MemoryAccessException('Section not executable!', self.base + offset, 1, 'read exec')
if offset < 0:
raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'read exec')
if offset >= self.size:
raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
1, 'read exec')
return self.content[offset]
def write(self, offset, size, data):
if self.flags.read_only:
raise MemoryAccessException('Section not writeable {}'.format(self.name), self.base + offset, size, 'write')
if offset < 0:
raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'write')
if offset >= self.size:
raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
size, 'write')
for i in range(size):
self.content[offset + i] = data[i]
def dump(self, at_addr=None, fmt='hex', max_rows=10, group=4, bytes_per_row=16, all=False):
highlight = -1
if at_addr is None:
at_addr = self.base
else:
highlight = at_addr - self.base
at_off = at_addr - self.base
start = max(align_addr(at_off - ((max_rows * bytes_per_row) // 2), 8) - 8, 0)
if all:
end = self.size
start = 0
else:
end = min(start + (max_rows * bytes_per_row), self.size)
fmt_str = " 0x{:0" + str(ceil(log(self.base + end, 16))) + "X}: {}"
if self.flags.executable:
# this section holds instructions!
start = 0 if all else max(at_off - (max_rows // 2), 0)
end = self.size if all else min(self.size, start + max_rows)
print(FMT_MEM + "{}, viewing {} instructions:".format(
self, end - start
) + FMT_NONE)
for i in range(start, end):
if i == highlight:
ins = FMT_UNDERLINE + FMT_ORANGE + repr(self.content[i]) + FMT_NONE
else:
ins = repr(self.content[i])
print(fmt_str.format(self.base + i, ins))
else:
print(FMT_MEM + "{}, viewing {} bytes:".format(
self, end - start
) + FMT_NONE)
for i in range(0, end - start, bytes_per_row):
data = self.content[start + i: min(start + i + bytes_per_row, end)]
if start + i <= highlight <= start + i + bytes_per_row:
# do hightlight here!
hi_ind = (highlight - start - i) // group
print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group, highlight=hi_ind)))
else:
print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group)))
if end == self.size:
print(FMT_MEM + "End of section!" + FMT_NONE)
else:
print(FMT_MEM + "More bytes ..." + FMT_NONE)
def __repr__(self):
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
self.__class__.__name__,
self.name,
self.base,
self.size,
self.flags,
self.owner
)
class LoadedExecutable:
"""
This represents an executable which is loaded into memory at address base_addr
This is basicalle the "loader" in normal system environments
It initializes the stack and heap
It still holds a symbol table, that is not accessible memory since I don't want to deal with
binary strings in memory etc.
"""
name: str
base_addr: int
sections_by_name: Dict[str, LoadedMemorySection]
sections: List[LoadedMemorySection]
symbols: Dict[str, int]
run_ptr: int
exported_symbols: Dict[str, int]
global_symbol_table: Dict[str, int]
def __init__(self, exe: Executable, base_addr: int, global_symbol_table: Dict[str, int]):
self.name = exe.name
self.base_addr = base_addr
self.sections = list()
self.sections_by_name = dict()
self.symbols = dict()
self.exported_symbols = dict()
self.global_symbol_table = global_symbol_table
curr = base_addr
for sec in exe.sections.values():
loaded_sec = LoadedMemorySection(
sec.name,
curr,
sec.size,
sec.continuous_content(self),
sec.flags,
self.name
)
self.sections.append(loaded_sec)
self.sections_by_name[loaded_sec.name] = loaded_sec
curr = align_addr(loaded_sec.size + curr)
for name, (sec_name, offset) in exe.symbols.items():
if sec_name == '_static_':
self.symbols[name] = offset
else:
ASSERT_IN(sec_name, self.sections_by_name)
self.symbols[name] = self.sections_by_name[sec_name].base + offset
for name in exe.exported_symbols:
self.exported_symbols[name] = self.symbols[name]
self.size = curr - base_addr
# translate run_ptr from executable
run_ptr_sec, run_ptr_off = exe.run_ptr
self.run_ptr = self.sections_by_name[run_ptr_sec].base + run_ptr_off
def lookup_symbol(self, name):
if name in self.symbols:
return self.symbols[name]
if name in self.global_symbol_table:
return self.global_symbol_table[name]
raise LinkerException('Symbol {} not found!'.format(name), (self,))
def __repr__(self):
return '{}[{}](base=0x{:08X}, size={}bytes, sections={}, run_ptr=0x{:08X})'.format(
self.__class__.__name__,
self.name,
self.base_addr,
self.size,
" ".join(self.sections_by_name.keys()),
self.run_ptr
)
def has_symb(self, arg):
return arg in self.symbols or arg in self.global_symbol_table

View File

@ -1,193 +0,0 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
This file holds the parser that parses the tokenizer output.
"""
from .helpers import parse_numeric_argument, int_to_bytes
from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags
from .Exceptions import *
from .Tokenizer import RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, RiscVPseudoOpToken
from typing import Dict, Tuple, List, Optional
class ExecutableParser:
"""
Parses output form the RiscVTokenizer
"""
tokenizer: 'RiscVTokenizer'
def __init__(self, tokenizer: 'RiscVTokenizer'):
self.instructions: List[RiscVInstructionToken] = list()
self.symbols: Dict[str, Tuple[str, int]] = dict()
self.sections: Dict[str, MemorySection] = dict()
self.tokenizer = tokenizer
self.active_section: Optional[str] = None
self.implicit_sections = False
self.globals: List[str] = list()
def parse(self) -> Executable:
"""
parse tokenizer output into an executable
:return: the parsed executable
:raise ParseException: Raises a ParseException when invalid input is read
"""
for token in self.tokenizer.tokens:
if isinstance(token, RiscVInstructionToken):
self.parse_instruction(token)
elif isinstance(token, RiscVSymbolToken):
self.handle_symbol(token)
elif isinstance(token, RiscVPseudoOpToken):
self.handle_pseudo_op(token)
return self._get_execuable()
def _get_execuable(self) -> Executable:
start_ptr = ('text', 0)
if '_start' in self.symbols:
start_ptr = self.symbols['_start']
elif 'main' in self.symbols:
start_ptr = self.symbols['main']
return Executable(start_ptr, self.sections, self.symbols, self.globals, self.tokenizer.name)
def parse_instruction(self, ins: 'RiscVInstructionToken') -> None:
"""
parses an Instruction token
:param ins: the instruction token
"""
if self.active_section is None:
self.op_text()
self.implicit_sections = True
ASSERT_EQ(self.active_section, 'text')
sec = self._curr_sec()
if isinstance(sec, InstructionMemorySection):
sec.add_insn(ins)
else:
raise ParseException("SHOULD NOT BE REACHED")
def handle_symbol(self, token: 'RiscVSymbolToken'):
"""
Handle a symbol token (such as 'main:')
:param token: the symbol token
"""
ASSERT_NOT_IN(token.name, self.symbols)
ASSERT_NOT_NULL(self.active_section)
sec_pos = self._curr_sec().size
self.symbols[token.name] = (self.active_section, sec_pos)
def handle_pseudo_op(self, op: 'RiscVPseudoOpToken'):
"""
Handle a pseudo op token (such as '.word 0xffaabbcc')
:param op: the peseudo-op token
"""
name = 'op_' + op.name
if hasattr(self, name):
getattr(self, name)(op)
else:
raise ParseException("Unknown pseudo op: {}".format(op), (op,))
## Pseudo op implementations:
def op_section(self, op: 'RiscVPseudoOpToken'):
"""
handles a .section token
:param op: The token
"""
ASSERT_LEN(op.args, 1)
name = op.args[0][1:]
ASSERT_IN(name, ('data', 'rodata', 'text'))
getattr(self, 'op_' + name)(op)
def op_text(self, op: 'RiscVPseudoOpToken' = None):
"""
handles a .text token
:param op: The token
"""
self._set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection)
def op_data(self, op: 'RiscVPseudoOpToken' = None):
"""
handles a .data token
:param op: The token
"""
self._set_sec('data', MemoryFlags(read_only=False, executable=False))
def op_rodata(self, op: 'RiscVPseudoOpToken' = None):
"""
handles a .rodata token
:param op: The token
"""
self._set_sec('rodata', MemoryFlags(read_only=True, executable=False))
def op_space(self, op: 'RiscVPseudoOpToken'):
"""
handles a .space token. Inserts empty space into the current (data or rodata) section
:param op: The token
"""
ASSERT_IN(self.active_section, ('data', 'rodata'))
ASSERT_LEN(op.args, 1)
size = parse_numeric_argument(op.args[0])
self._curr_sec().add(bytearray(size))
def op_ascii(self, op: 'RiscVPseudoOpToken'):
"""
handles a .ascii token. Inserts ascii encoded text into the currrent data section
:param op: The token
"""
ASSERT_IN(self.active_section, ('data', 'rodata'))
ASSERT_LEN(op.args, 1)
str = op.args[0][1:-1].encode('ascii').decode('unicode_escape')
self._curr_sec().add(bytearray(str, 'ascii'))
def op_asciiz(self, op: 'RiscVPseudoOpToken'):
"""
handles a .ascii token. Inserts nullterminated ascii encoded text into the currrent data section
:param op: The token
"""
ASSERT_IN(self.active_section, ('data', 'rodata'))
ASSERT_LEN(op.args, 1)
str = op.args[0][1:-1].encode('ascii').decode('unicode_escape')
self._curr_sec().add(bytearray(str + '\0', 'ascii'))
def op_global(self, op: 'RiscVPseudoOpToken'):
"""
handles a .global token. Marks the token as global
:param op: The token
"""
ASSERT_LEN(op.args, 1)
name = op.args[0]
self.globals.append(name)
def op_set(self, op: 'RiscVPseudoOpToken'):
"""
handles a .set name, val token. Sets the symbol name to val
:param op: The token
"""
ASSERT_LEN(op.args, 2)
name = op.args[0]
val = parse_numeric_argument(op.args[1])
self.symbols[name] = ('_static_', val)
def op_align(self, op: 'RiscVPseudoOpToken'):
"""
handles an align token. Currently a nop (just not implemented fully yet, as linker handles most alignement tasks)
:param op: The token
"""
pass
def op_word(self, op: 'RiscVPseudoOpToken'):
ASSERT_LEN(op.args, 1)
val = parse_numeric_argument(op.args[0])
self._curr_sec().add(int_to_bytes(val, 4))
## Section handler code
def _set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection):
if name not in self.sections:
self.sections[name] = cls(name, flags)
self.active_section = name
def _curr_sec(self):
return self.sections[self.active_section]

View File

@ -1,22 +1,22 @@
from abc import ABC, abstractmethod
from typing import Optional
from riscemu.types import MemorySection, MemoryFlags, T_RelativeAddress
class IOModule(ABC):
addr: int
size: int
def __init__(self, addr: int, size: int):
self.addr = addr
self.size = size
@abstractmethod
def read(self, addr: int, size: int):
pass
@abstractmethod
def write(self, addr: int, data: bytearray, size: int):
pass
class IOModule(MemorySection, ABC):
def __init__(self, name: str, flags: MemoryFlags, size: int, owner: str = 'system', base: int = 0):
super(IOModule, self).__init__(name, flags, size, base, owner, None)
def contains(self, addr, size: int = 0):
return self.addr <= addr < self.addr + self.size and \
self.addr <= addr + size <= self.addr + self.size
return self.base <= addr < self.base + self.size and \
self.base <= addr + size <= self.base + self.size
def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
bytes_per_row: int = 16, rows: int = 10, group: int = 4):
print(self)
def __repr__(self):
return "{}[{}] at 0x{:0X} (size={}bytes, flags={})".format(
self.__class__.__name__, self.name, self.base, self.size, self.flags
)

View File

@ -1,70 +1,28 @@
from .IOModule import IOModule
from ..priv.Exceptions import InstructionAccessFault
from ..helpers import int_from_bytes
from threading import Thread
import time
from ..types import T_RelativeAddress, Instruction, MemoryFlags, Int32
def _window_loop(textIO: 'TextIO'):
try:
import PySimpleGUI as sg
logs = sg.Text(font="monospace")
col = sg.Column([[logs]], size=(640, 400), scrollable=True)
window = sg.Window("TextIO:{:x}".format(textIO.addr), [[col]])
lines = list()
window.finalize()
textIO.set_sg_window(window)
while True:
e, v = window.read()
if e == sg.WINDOW_CLOSED:
window.close()
textIO.set_sg_window(None)
break
if e == 'putlog':
lines.insert(0, v[0])
logs.update(value='\n'.join(lines) + '\n')
col.contents_changed()
except ImportError:
print("[TextIO] window disabled - please install PySimpleGui!")
textIO.set_sg_window(None)
class TextIO(IOModule):
def __init__(self, addr: int, buflen: int = 128):
super(TextIO, self).__init__(addr, buflen + 4)
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
raise InstructionAccessFault(self.base + offset)
def __init__(self, base: int, buflen: int = 128):
super(TextIO, self).__init__('TextIO', MemoryFlags(False, False), buflen + 4, base=base)
self.buff = bytearray(buflen)
self.current_line = ""
self.sg_window = None
self.start_buffer = list()
self.thread = Thread(target=_window_loop, args=(self,))
self.thread.start()
time.sleep(0.1)
def set_sg_window(self, window):
if self.sg_window is not None and window is not None:
raise Exception("cannot set window twice!")
self.sg_window = window
buff = self.start_buffer
self.start_buffer = None if window is None else list()
for line in buff:
self._present(line)
def read(self, addr: int, size: int) -> bytearray:
raise InstructionAccessFault(addr)
raise InstructionAccessFault(self.base + addr)
def write(self, addr: int, data: bytearray, size: int):
if addr == self.addr:
def write(self, addr: int, size: int, data: bytearray):
if addr == 0:
if size > 4:
raise InstructionAccessFault(addr)
if int_from_bytes(data[0:4]) > 0:
if Int32(data) != 0:
self._print()
return
buff_start = addr - self.addr - 4
buff_start = addr - 4
self.buff[buff_start:buff_start + size] = data[0:size]
def _print(self):
@ -83,10 +41,4 @@ class TextIO(IOModule):
self.current_line += text
def _present(self, text: str):
if self.sg_window is not None:
self.sg_window.write_event_value('putlog', text)
elif self.start_buffer is not None:
self.start_buffer.append(text)
else:
print("[TextIO:{:x}] {}".format(self.addr, text))
print("[TextIO:{:x}] {}".format(self.base, text))

View File

@ -4,17 +4,20 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
from .Config import RunConfig
from .Executable import Executable, LoadedExecutable, LoadedMemorySection, LoadedInstruction, MemoryFlags
from .helpers import align_addr, int_from_bytes
from .Exceptions import OutOfMemoryException, InvalidAllocationException
from typing import Dict, List, Optional, Union
from .colors import *
from typing import Dict, List, Tuple, Optional
from .helpers import align_addr
from .types import Instruction, MemorySection, MemoryFlags, T_AbsoluteAddress, \
Program, InstructionContext, Int32
from .types.exceptions import InvalidAllocationException, MemoryAccessException
class MMU:
"""
The MemoryManagementUnit (handles loading binaries, and reading/writing data)
The MemoryManagementUnit. This provides a unified interface for reading/writing data from/to memory.
It also provides various translations for addresses.
"""
max_size = 0xFFFFFFFF
@ -27,19 +30,14 @@ class MMU:
No single allocation can be bigger than 64 MB
"""
sections: List[LoadedMemorySection]
sections: List[MemorySection]
"""
A list of all loaded memory sections
"""
binaries: List[LoadedExecutable]
programs: List[Program]
"""
A list of all loaded executables
"""
last_bin: Optional[LoadedExecutable] = None
"""
The last loaded executable (the next executable is inserted directly after this one)
A list of all loaded programs
"""
global_symbols: Dict[str, int]
@ -47,79 +45,15 @@ class MMU:
The global symbol table
"""
last_ins_sec: Optional[LoadedMemorySection]
def __init__(self, conf: RunConfig):
def __init__(self):
"""
Create a new MMU, respecting the active RunConfiguration
:param conf: The config to respect
Create a new MMU
"""
self.sections: List[LoadedMemorySection] = list()
self.binaries: List[LoadedExecutable] = list()
self.first_free_addr: int = 0x100
self.conf: RunConfig = conf
self.global_symbols: Dict[str, int] = dict()
self.last_ins_sec = None
self.programs = list()
self.sections = list()
self.global_symbols = dict()
def load_bin(self, exe: Executable) -> LoadedExecutable:
"""
Load an executable into memory
:param exe: the executable to load
:return: A LoadedExecutable
:raises OutOfMemoryException: When all memory is used
"""
# align to 8 byte word
addr = align_addr(self.first_free_addr)
loaded_bin = LoadedExecutable(exe, addr, self.global_symbols)
if loaded_bin.size + addr > self.max_size:
raise OutOfMemoryException('load of executable')
self.binaries.append(loaded_bin)
self.first_free_addr = loaded_bin.base_addr + loaded_bin.size
# read sections into sec dict
for sec in loaded_bin.sections:
self.sections.append(sec)
self.global_symbols.update(loaded_bin.exported_symbols)
print(FMT_MEM + "[MMU] Successfully loaded{}: {}".format(FMT_NONE, loaded_bin))
return loaded_bin
def allocate_section(self, name: str, req_size: int, flag: MemoryFlags):
"""
Used to allocate a memory region (data only). Use `load_bin` if you want to load a binary, this is used for
stack and maybe malloc in the future.
:param name: Name of the section to allocate
:param req_size: The requested size
:param flag: The flags protecting this memory section
:return: The LoadedMemorySection
"""
if flag.executable:
raise InvalidAllocationException('cannot allocate executable section', name, req_size, flag)
if req_size < 0:
raise InvalidAllocationException('Invalid size request', name, req_size, flag)
if req_size > self.max_alloc_size:
raise InvalidAllocationException('Cannot allocate more than {} bytes at a time'.format(self.max_alloc_size),
name, req_size, flag)
base = align_addr(self.first_free_addr)
size = align_addr(req_size)
sec = LoadedMemorySection(name, base, size, bytearray(size), flag, "<runtime>")
self.sections.append(sec)
self.first_free_addr = base + size
return sec
def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]:
def get_sec_containing(self, addr: T_AbsoluteAddress) -> Optional[MemorySection]:
"""
Returns the section that contains the address addr
@ -131,31 +65,27 @@ class MMU:
return sec
return None
def get_bin_containing(self, addr: int) -> Optional[LoadedExecutable]:
for exe in self.binaries:
if exe.base_addr <= addr < exe.base_addr + exe.size:
return exe
def get_bin_containing(self, addr: T_AbsoluteAddress) -> Optional[Program]:
for program in self.programs:
if program.base <= addr < program.base + program.size:
return program
return None
def read_ins(self, addr: int) -> LoadedInstruction:
def read_ins(self, addr: T_AbsoluteAddress) -> Instruction:
"""
Read a single instruction located at addr
:param addr: The location
:return: The Instruction
"""
sec = self.last_ins_sec
if sec is not None and sec.base <= addr < sec.base + sec.size:
return sec.read_instruction(addr - sec.base)
sec = self.get_sec_containing(addr)
self.last_ins_sec = sec
if sec is None:
print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! "
"Have you forgotten an exit syscall or ret statement?" + FMT_NONE)
print(FMT_MEM + "[MMU] Trying to read instruction form invalid region! (read at {}) ".format(addr)
+ "Have you forgotten an exit syscall or ret statement?" + FMT_NONE)
raise RuntimeError("No next instruction available!")
return sec.read_instruction(addr - sec.base)
return sec.read_ins(addr - sec.base)
def read(self, addr: int, size: int) -> bytearray:
def read(self, addr: Union[int, Int32], size: int) -> bytearray:
"""
Read size bytes of memory at addr
@ -163,10 +93,16 @@ class MMU:
:param size: The number of bytes to read
:return: The bytearray at addr
"""
if isinstance(addr, Int32):
breakpoint()
addr = addr.unsigned_value
sec = self.get_sec_containing(addr)
if sec is None:
print(FMT_MEM + "[MMU] Trying to read data form invalid region at 0x{:x}! ".format(addr) + FMT_NONE)
raise MemoryAccessException("region is non-initialized!", addr, size, 'read')
return sec.read(addr - sec.base, size)
def write(self, addr: int, size: int, data):
def write(self, addr: int, size: int, data: bytearray):
"""
Write bytes into memory
@ -176,8 +112,8 @@ class MMU:
"""
sec = self.get_sec_containing(addr)
if sec is None:
print(FMT_MEM + '[MMU] Invalid write into non-initialized section at 0x{:08X}'.format(addr) + FMT_NONE)
raise RuntimeError("No write pls")
print(FMT_MEM + '[MMU] Invalid write into non-initialized region at 0x{:08X}'.format(addr) + FMT_NONE)
raise MemoryAccessException("region is non-initialized!", addr, size, 'write')
return sec.write(addr - sec.base, size, data)
@ -195,7 +131,7 @@ class MMU:
return
sec.dump(addr, *args, **kwargs)
def symbol(self, symb: str):
def label(self, symb: str):
"""
Look up the symbol symb in all local symbol tables (and the global one)
@ -204,14 +140,152 @@ class MMU:
print(FMT_MEM + "[MMU] Lookup for symbol {}:".format(symb) + FMT_NONE)
if symb in self.global_symbols:
print(" Found global symbol {}: 0x{:X}".format(symb, self.global_symbols[symb]))
for b in self.binaries:
if symb in b.symbols:
print(" Found local symbol {}: 0x{:X} in {}".format(symb, b.symbols[symb], b.name))
for bin in self.programs:
if symb in bin.context.labels:
print(" Found local labels {}: 0x{:X} in {}".format(symb, bin.context.labels[symb], bin.name))
def read_int(self, addr: int) -> int:
return int_from_bytes(self.read(addr, 4))
def read_int(self, addr: int) -> Int32:
return Int32(self.read(addr, 4))
def translate_address(self, address: T_AbsoluteAddress) -> str:
sec = self.get_sec_containing(address)
if not sec:
return "unknown at 0x{:0x}".format(address)
bin = self.get_bin_containing(address)
secs = set(sec.name for sec in bin.sections) if bin else []
elf_markers = {
'__global_pointer$', '_fdata', '_etext', '_gp',
'_bss_start', '_bss_end', '_ftext', '_edata', '_end', '_fbss'
}
def key(x):
name, val = x
return address - val
best_fit = iter(sorted(filter(lambda x: x[1] <= address, sec.context.labels.items()), key=key))
best = ('', float('inf'))
for name, val in best_fit:
if address - val < best[1]:
best = (name, val)
if address - val == best[1]:
if best[0] in elf_markers:
best = (name, val)
elif best[0] in secs and name not in elf_markers:
best = (name, val)
name, val = best
if not name:
return "unknown at 0x{:0x}".format(address)
return str('{}:{} at {} (0x{:0x}) + 0x{:0x}'.format(
sec.owner, sec.name, name, val, address - val
))
def has_continous_free_region(self, start: int, end: int) -> bool:
# if we have no sections we are all good
if len(self.sections) == 0:
return True
# if the last section is located before the start we are also good
if start >= self.sections[-1].base + self.sections[-1].size:
return True
for sec in self.sections:
# skip all sections that end before the required start point
if sec.base + sec.size <= start:
continue
# we now have the first section that doesn't end **before** the start point
# if this section starts after the specified end, we are good
if sec.base >= end:
return True
# otherwise we can't continue
return False
# if all sections end before the requested start we are good
# technically we shouldn't ever reach this point, but better safe than sorry
return True
def load_program(self, program: Program, align_to: int = 4):
if program.base is not None:
if not self.has_continous_free_region(program.base, program.base + program.size):
print(FMT_MEM + "Cannot load program {} into desired space (0x{:0x}-0x{:0x}), area occupied.".format(
program.name, program.base, program.base + program.size
) + FMT_NONE)
raise InvalidAllocationException("Area occupied".format(
program.name, program.base, program.base + program.size
), program.name, program.size, MemoryFlags(False, True))
at_addr = program.base
else:
at_addr = align_addr(self.get_guaranteed_free_address(), align_to)
# trigger the load event to set all addresses in the binary
program.loaded_trigger(at_addr)
# add program and sections to internal state
self.programs.append(program)
self.sections += program.sections
self._update_state()
# load all global symbols from program
self.global_symbols.update(
{key: program.context.labels[key] for key in program.global_labels}
)
# inject reference to global symbol table into program context
# FIXME: this is pretty unclean and should probably be solved in a better way in the future
program.context.global_symbol_dict = self.global_symbols
def load_section(self, sec: MemorySection, fixed_position: bool = False) -> bool:
if fixed_position:
if self.has_continous_free_region(sec.base, sec.base + sec.size):
self.sections.append(sec)
self._update_state()
else:
print(FMT_MEM + '[MMU] Cannot place section {} at {}, space is occupied!'.format(sec, sec.base))
return False
else:
at_addr = align_addr(self.get_guaranteed_free_address(), 8)
sec.base = at_addr
self.sections.append(sec)
self._update_state()
return True
def _update_state(self):
"""
Called whenever a section or program is added to keep the list of programs and sections consistent
:return:
"""
self.programs.sort(key=lambda bin: bin.base)
self.sections.sort(key=lambda sec: sec.base)
def get_guaranteed_free_address(self) -> T_AbsoluteAddress:
if len(self.sections) == 0:
return 0x100
else:
return self.sections[-1].base + self.sections[-1].size
def __repr__(self):
return "MMU(\n\t{}\n)".format(
"\n\t".join(repr(x) for x in self.sections)
return "{}(\n\t{}\n)".format(
self.__class__.__name__,
"\n\t".join(repr(x) for x in self.programs)
)
def context_for(self, addr: T_AbsoluteAddress) -> InstructionContext:
sec = self.get_sec_containing(addr)
if sec is not None:
return sec.context
return InstructionContext()
def report_addr(self, addr: T_AbsoluteAddress):
sec = self.get_sec_containing(addr)
if not sec:
print("addr is in no section!")
return
owner = [b for b in self.programs if b.name == sec.owner]
if owner:
print("owned by: {}".format(owner[0]))
print("{}: 0x{:0x} + 0x{:0x}".format(name, val, addr - val))

View File

@ -1,320 +0,0 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import re
from enum import IntEnum
from typing import List
from .Exceptions import ParseException
PSEUDO_OPS = [
'.asciiz',
'.double',
'.extern',
'.global',
'.align',
'.float',
'.kdata',
'.ktext',
'.space',
'.ascii',
'.byte',
'.data',
'.half',
'.text',
'.word',
'.set',
]
COMMENT_START = ["#", ";"]
REG_VALID_SYMBOL_LABEL = re.compile(r'^([A-z_.][A-z_0-9.]*[A-z_0-9]|[A-z_]):')
REG_WHITESPACE_UNTIL_NEWLINE = re.compile(r'^(\s*)\n')
REG_WHITESPACE = re.compile(r'^\s*')
REG_NONWHITESPACE = re.compile(r'^[^\s]*')
REG_UNTIL_NEWLINE = re.compile(r'^[^\n]*')
REG_WHITESPACE_NO_LINEBREAK = re.compile(r'^[ \t]*')
REG_VALID_ARGUMENT = re.compile(
r'^([+-]?(0x[0-9A-f]+|[0-9]+)|[A-z_.][A-z0-9_.]*[A-z_0-9]|[A-z_])(\(([A-z_.][A-z_0-9.]*[A-z_0-9]|[A-z_])\))?'
)
REG_ARG_SPLIT = re.compile(r'^,[ \t]*')
def split_accepting_quotes(string, at=REG_ARG_SPLIT, quotes=('"', "'")):
pos = 0
last_piece = 0
pieces = []
in_quotes = False
if string is None:
return pieces
while pos < len(string):
match = at.match(string[pos:])
if match is not None:
if not in_quotes:
pieces.append(string[last_piece:pos])
pos += len(match.group(0))
last_piece = pos
else:
pos += len(match.group(0))
elif string[pos] in quotes:
in_quotes = not in_quotes
pos += 1
elif string[pos] in COMMENT_START and not in_quotes: # entering comment
break
else:
pos += 1
if in_quotes:
print("[Tokenizer.split] unbalanced quotes in \"{}\"!".format(string))
pieces.append(string[last_piece:pos])
return pieces
class RiscVInput:
"""
Represents an Assembly file
"""
def __init__(self, content: str, name: str):
self.content = content
self.pos = 0
self.len = len(content)
self.name = name
@staticmethod
def from_file(src: str):
with open(src, 'r') as f:
return RiscVInput(f.read(), src)
def peek(self, offset: int = 0, size: int = 1, regex: re.Pattern = None, text: str = None, regex_group: int = 0):
at = self.pos + offset
if regex:
if not isinstance(regex, re.Pattern):
print("uncompiled regex passed to peek!")
regex = re.compile(regex)
match = regex.match(self.content[at:])
if match is None:
return None
if regex_group != 0 and not match.group(0).startswith(match.group(regex_group)):
print("Cannot peek regex group that does not start at match start!")
return None
return match.group(regex_group)
if text:
if self.content[at:].startswith(text):
return self.content[at:at + len(text)]
return False
return self.content[at:at + size]
def peek_one_of(self, options: List[str]):
longest_peek = 0
ret = False
for text in options:
if self.peek(text=text):
if len(text) > longest_peek:
longest_peek = len(text)
ret = text
return ret
def consume(self, size: int = 1, regex: re.Pattern = None, text: str = None, regex_group: int = 0):
at = self.pos
if regex:
if not isinstance(regex, re.Pattern):
print("uncompiled regex passed to peek!")
regex = re.compile(regex)
match = regex.match(self.content[at:])
if match is None:
return None
if regex_group != 0 and not match.group(0).startswith(match.group(regex_group)):
print("Cannot consume regex group that does not start at match start!")
return None
self.pos += len(match.group(regex_group))
return match.group(regex_group)
if text:
if self.content[at:].startswith(text):
self.pos += len(text)
return text
return None
self.pos += size
return self.content[at:at + size]
def consume_one_of(self, options: List[str]):
longest_peek = 0
ret = False
for text in options:
if self.peek(text=text):
if len(text) > longest_peek:
longest_peek = len(text)
ret = text
self.consume(text=ret)
return ret
def seek_newline(self):
return self.consume(regex=REG_WHITESPACE_UNTIL_NEWLINE, regex_group=1)
def consume_whitespace(self, linebreak=True):
if linebreak:
return self.consume(regex=REG_WHITESPACE)
return self.consume(regex=REG_WHITESPACE_NO_LINEBREAK)
def has_next(self):
return self.pos < self.len
def context(self, size: int = 5):
"""
returns a context string:
<local input before pos>|<local input after pos>
"""
start = max(self.pos - size, 0)
end = min(self.pos + size, self.len - 1)
return self.content[start:self.pos] + '|' + self.content[self.pos:end]
class TokenType(IntEnum):
SYMBOL = 0
INSTRUCTION = 1
PSEUDO_OP = 2
def __repr__(self):
return self.name
def __str__(self):
return self.name
class RiscVToken:
type: TokenType
def __init__(self, t_type: TokenType):
self.type = t_type
def __repr__(self):
return "{}[{}]({})".format(self.__class__.__name__, self.type, self.text())
def text(self):
"""
create text representation of instruction
"""
return "unknown"
class RiscVInstructionToken(RiscVToken):
def __init__(self, name, args):
super().__init__(TokenType.INSTRUCTION)
self.instruction = name
self.args = args
def text(self):
if len(self.args) == 0:
return self.instruction
if len(self.args) == 1:
return "{} {}".format(self.instruction, self.args[0])
if len(self.args) == 2:
return "{} {}, {}".format(self.instruction, *self.args)
return "{} {}, {}, {}".format(self.instruction, *self.args)
class RiscVSymbolToken(RiscVToken):
def __init__(self, name):
super().__init__(TokenType.SYMBOL)
self.name = name
def text(self):
return self.name
class RiscVPseudoOpToken(RiscVToken):
def __init__(self, name, args):
super().__init__(TokenType.PSEUDO_OP)
self.name = name
self.args = args
def text(self):
return "{} {}".format(self.name, self.args)
class RiscVTokenizer:
"""
A tokenizer for the RISC-V syntax of a given CPU
"""
def __init__(self, input_assembly: RiscVInput, instructions: List[str]):
self.input = input_assembly
self.tokens: List[RiscVToken] = []
self.name = input_assembly.name
self.instructions = instructions
def tokenize(self):
while self.input.has_next():
# remove leading whitespaces, place cursor at text start
self.input.consume_whitespace()
# check if we have a pseudo op
if self.input.peek_one_of(PSEUDO_OPS):
self.parse_pseudo_op()
# check if we have a symbol (like main:)
elif self.input.peek(regex=REG_VALID_SYMBOL_LABEL):
self.parse_symbol()
# comment
elif self.input.peek() in COMMENT_START:
self.parse_comment()
# must be instruction
elif self.input.peek_one_of(self.instructions):
self.parse_instruction()
else:
token = self.input.peek(size=5)
raise ParseException("Unknown token around {} at: {}".format(repr(token), repr(self.input.context())))
self.input.consume_whitespace()
def parse_pseudo_op(self):
name = self.input.consume_one_of(PSEUDO_OPS)
self.input.consume_whitespace(linebreak=False)
arg_str = self.input.consume(regex=REG_UNTIL_NEWLINE)
if not arg_str:
args = []
else:
args = split_accepting_quotes(arg_str)
self.tokens.append(RiscVPseudoOpToken(name[1:], args))
def parse_symbol(self):
name = self.input.consume(regex=REG_VALID_SYMBOL_LABEL)
self.tokens.append(RiscVSymbolToken(name[:-1]))
if not self.input.consume_whitespace():
print("[Tokenizer] symbol declaration should always be followed by whitespace (at {})!".format(
self.input.context()))
def parse_instruction(self):
ins = self.input.consume_one_of(self.instructions)
args = []
self.input.consume_whitespace(linebreak=False)
while self.input.peek(regex=REG_VALID_ARGUMENT) and len(args) < 3:
arg = self.input.consume(regex=REG_VALID_ARGUMENT)
args.append(arg)
if self.input.peek(text=','):
self.input.consume(text=',')
self.input.consume_whitespace(linebreak=False)
else:
break
self.tokens.append(RiscVInstructionToken(ins, args))
def parse_comment(self):
# just consume the rest
self.input.consume(regex=REG_UNTIL_NEWLINE)

View File

@ -8,24 +8,21 @@ This package aims at providing an all-round usable RISC-V emulator and debugger
It contains everything needed to run assembly files, so you don't need any custom compilers or toolchains
"""
from .Exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \
from .types.exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \
ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException
from .Tokenizer import RiscVInput, RiscVTokenizer
from .Executable import Executable, LoadedExecutable, LoadedMemorySection
from .ExecutableParser import ExecutableParser
from .instructions import *
from .MMU import MMU
from .Registers import Registers
from .Syscall import SyscallInterface, Syscall
from .CPU import CPU
from .registers import Registers
from .syscall import SyscallInterface, Syscall
from .CPU import CPU, UserModeCPU
from .debug import launch_debug_session
from .Config import RunConfig
from .config import RunConfig
from .parser import tokenize, parse_tokens, AssemblyFileLoader
__author__ = "Anton Lydike <Anton@Lydike.com>"
__copyright__ = "Copyright 2021 Anton Lydike"
__version__ = '1.0.0'
__copyright__ = "Copyright 2022 Anton Lydike"
__version__ = '2.0.0a4'

View File

@ -5,16 +5,25 @@ SPDX-License-Identifier: MIT
This file holds the logic for starting the emulator from the CLI
"""
from riscemu import RiscemuBaseException, __copyright__, __version__
from riscemu.CPU import UserModeCPU
if __name__ == '__main__':
from . import *
from .helpers import *
from .config import RunConfig
from .instructions import InstructionSetDict
from .colors import FMT_BOLD, FMT_MAGENTA
from .parser import AssemblyFileLoader
import argparse
import sys
all_ins_names = list(InstructionSetDict.keys())
if '--version' in sys.argv:
print("riscemu version {}\n{}\n\nAvailable ISA: {}".format(
__version__, __copyright__,
", ".join(InstructionSetDict.keys())
))
sys.exit()
class OptionStringAction(argparse.Action):
def __init__(self, option_strings, dest, keys=None, omit_empty=False, **kwargs):
@ -64,6 +73,12 @@ if __name__ == '__main__':
parser.add_argument('--stack_size', type=int, help='Stack size of loaded programs, defaults to 8MB', nargs='?')
parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count',
default=0)
parser.add_argument('--interactive', help="Launch the interactive debugger instantly instead of loading any "
"programs", action='store_true')
args = parser.parse_args()
# create a RunConfig from the cli args
@ -74,7 +89,8 @@ if __name__ == '__main__':
debug_on_exception=not args.options['fail_on_ex'],
add_accept_imm=args.options['add_accept_imm'],
scall_fs=args.syscall_opts['fs_access'],
scall_input=not args.syscall_opts['disable_input']
scall_input=not args.syscall_opts['disable_input'],
verbosity=args.verbose
)
for k, v in dict(cfg_dict).items():
if v is None:
@ -93,17 +109,21 @@ if __name__ == '__main__':
]
try:
cpu = CPU(cfg, ins_to_load)
loaded_exe = None
for file in args.files:
tk = cpu.get_tokenizer(RiscVInput.from_file(file))
tk.tokenize()
loaded_exe = cpu.load(ExecutableParser(tk).parse())
# run the last loaded executable
cpu.run_loaded(loaded_exe)
except RiscemuBaseException as e:
print("Error while parsing: {}".format(e.message()))
import traceback
cpu = UserModeCPU(ins_to_load, cfg)
opts = AssemblyFileLoader.get_options(sys.argv)
for file in args.files:
loader = AssemblyFileLoader.instantiate(file, opts)
cpu.load_program(loader.parse())
# set up a stack
cpu.setup_stack(cfg.stack_size)
# launch the last loaded program
cpu.launch(cpu.mmu.programs[-1], verbose=cfg.verbosity > 1)
except RiscemuBaseException as e:
print("Error: {}".format(e.message()))
e.print_stacktrace()
traceback.print_exception(type(e), e, e.__traceback__)
sys.exit(1)

214
riscemu/assembler.py Normal file
View File

@ -0,0 +1,214 @@
from enum import Enum, auto
from typing import List
from typing import Optional, Tuple, Union
from .colors import FMT_PARSE, FMT_NONE
from riscemu.types.exceptions import ParseException, ASSERT_LEN
from .helpers import parse_numeric_argument, align_addr, get_section_base_name
from .tokenizer import Token
from .types import Program, T_RelativeAddress, InstructionContext, Instruction, BinaryDataMemorySection, \
InstructionMemorySection, Int32
INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini')
"""
A tuple containing all section names which contain executable code (instead of data)
The first segment of each segment (first segment of ".text.main" is ".text") is checked
against this list to determine the type of it.
"""
class MemorySectionType(Enum):
Data = auto()
Instructions = auto()
class CurrentSection:
name: str
data: Union[List[Instruction], bytearray]
type: MemorySectionType
base: int
def __init__(self, name: str, type: MemorySectionType, base: int = 0):
self.name = name
self.type = type
self.base = base
if self.type == MemorySectionType.Data:
self.data = bytearray()
elif self.type == MemorySectionType.Instructions:
self.data = list()
else:
raise ParseException("Unknown section type: {}".format(type))
def current_address(self) -> T_RelativeAddress:
if self.type == MemorySectionType.Data:
return len(self.data) + self.base
return len(self.data) * 4 + self.base
def __repr__(self):
return "{}(name={},data={},type={})".format(
self.__class__.__name__, self.name,
self.data, self.type.name
)
class ParseContext:
section: Optional[CurrentSection]
context: InstructionContext
program: Program
def __init__(self, name: str):
self.program = Program(name)
self.context = self.program.context
self.section = None
def finalize(self) -> Program:
self._finalize_section()
return self.program
def _finalize_section(self):
if self.section is None:
return
if self.section.type == MemorySectionType.Data:
section = BinaryDataMemorySection(
self.section.data, self.section.name, self.context, self.program.name, self.section.base
)
self.program.add_section(section)
elif self.section.type == MemorySectionType.Instructions:
section = InstructionMemorySection(
self.section.data, self.section.name, self.context, self.program.name, self.section.base
)
self.program.add_section(section)
self.section = None
def new_section(self, name: str, type: MemorySectionType, alignment: int = 4):
base = 0
if self.section is not None:
base = align_addr(self.section.current_address(), alignment)
self._finalize_section()
self.section = CurrentSection(name, type, base)
def add_label(self, name: str, value: int, is_global: bool = False, is_relative: bool = False):
self.context.labels[name] = value
if is_global:
self.program.global_labels.add(name)
if is_relative:
self.program.relative_labels.add(name)
def __repr__(self):
return "{}(\n\tsetion={},\n\tprogram={}\n)".format(
self.__class__.__name__, self.section, self.program
)
def ASSERT_IN_SECTION_TYPE(context: ParseContext, type: MemorySectionType):
if context.section is None:
raise ParseException('Error, expected to be in {} section, but no section is present...'.format(type.name))
if context.section.type != type:
raise ParseException(
'Error, expected to be in {} section, but currently in {}...'.format(type.name, context.section)
)
class AssemblerDirectives:
"""
This class represents a collection of all assembler directives as documented by
https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md#pseudo-ops
All class methods prefixed with op_ are directly used as assembler directives.
"""
@classmethod
def op_align(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
align_to = parse_numeric_argument(args[0])
current_mod = context.section.current_address() % align_to
if current_mod == 0:
return
context.section.data += bytearray(align_to - current_mod)
@classmethod
def op_section(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES:
context.new_section(args[0], MemorySectionType.Instructions)
else:
context.new_section(args[0], MemorySectionType.Data)
@classmethod
def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
context.program.global_labels.add(args[0])
@classmethod
def op_global(cls, token: Token, args: Tuple[str], context: ParseContext):
cls.op_globl(token, args, context)
@classmethod
def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 2)
name = args[0]
value = parse_numeric_argument(args[1])
context.context.labels[name] = value
@classmethod
def op_space(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
size = parse_numeric_argument(args[0])
cls.add_bytes(size, None, context)
@classmethod
def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
size = parse_numeric_argument(args[0])
cls.add_bytes(size, bytearray(size), context)
@classmethod
def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext):
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
if content is None:
content = bytearray(size)
if isinstance(content, int):
content = Int32(content).to_bytes(size)
context.section.data += content
@classmethod
def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True):
# replace '\t' and '\n' escape sequences
text = text.replace('\\n', '\n').replace('\\t', '\t')
encoded_bytes = bytearray(text.encode('ascii'))
if zero_terminate:
encoded_bytes += bytearray(1)
cls.add_bytes(len(encoded_bytes), encoded_bytes, context)
@classmethod
def handle_instruction(cls, token: Token, args: Tuple[str], context: ParseContext):
op = token.value[1:]
if hasattr(cls, 'op_' + op):
getattr(cls, 'op_' + op)(token, args, context)
elif op in ('text', 'data', 'rodata', 'bss', 'sbss'):
cls.op_section(token, (token.value,), context)
elif op in ('string', 'asciiz', 'asciz', 'ascii'):
ASSERT_LEN(args, 1)
cls.add_text(args[0], context, op == 'ascii')
elif op in DATA_OP_SIZES:
size = DATA_OP_SIZES[op]
for arg in args:
cls.add_bytes(size, parse_numeric_argument(arg), context)
else:
print(FMT_PARSE + "Unknown assembler directive: {} {} in {}".format(token, args, context) + FMT_NONE)
DATA_OP_SIZES = {
'byte': 1,
'2byte': 2, 'half': 2, 'short': 2,
'4byte': 4, 'word': 4, 'long': 4,
'8byte': 8, 'dword': 8, 'quad': 8,
}

View File

@ -1,16 +1,15 @@
"""
RiscEmu (c) 2021 Anton Lydike
RiscEmu (c) 2021-2022 Anton Lydike
SPDX-License-Identifier: MIT
"""
from dataclasses import dataclass
from typing import Optional
@dataclass(frozen=True, init=True)
class RunConfig:
stack_size: int = 8 * 1024 * 64 # for 8KB stack
stack_size: int = 8 * 1024 * 64 # for 8KB stack
include_scall_symbols: bool = True
add_accept_imm: bool = False
# debugging
@ -20,4 +19,7 @@ class RunConfig:
scall_input: bool = True
scall_fs: bool = False
verbosity: int = 0
slowdown: float = 1
CONFIG = RunConfig()

View File

@ -3,36 +3,37 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import os.path
import typing
from .Registers import Registers
from .colors import FMT_DEBUG, FMT_NONE
from .Executable import LoadedInstruction
from .types import SimpleInstruction
from .helpers import *
if typing.TYPE_CHECKING:
from . import *
from riscemu import CPU, Registers
HIST_FILE = os.path.join(os.path.expanduser('~'), '.riscemu_history')
def launch_debug_session(cpu: 'CPU', mmu: 'MMU', reg: 'Registers', prompt=""):
if not cpu.conf.debug_instruction or cpu.active_debug:
def launch_debug_session(cpu: 'CPU', prompt=""):
if cpu.debugger_active:
return
import code
import readline
import rlcompleter
cpu.active_debug = True
# set the active debug flag
cpu.debugger_active = True
# setup some aliases:
registers = reg
regs = reg
memory = mmu
mem = mmu
syscall_interface = cpu.syscall_int
registers = cpu.regs
regs = cpu.regs
memory = cpu.mmu
mem = cpu.mmu
mmu = cpu.mmu
# setup helper functions:
def dump(what, *args, **kwargs):
if isinstance(what, Registers):
if what == regs:
regs.dump(*args, **kwargs)
else:
mmu.dump(what, *args, **kwargs)
@ -48,22 +49,47 @@ def launch_debug_session(cpu: 'CPU', mmu: 'MMU', reg: 'Registers', prompt=""):
if len(args) > 3:
print("Invalid arg count!")
return
bin = mmu.get_bin_containing(cpu.pc)
context = mmu.context_for(cpu.pc)
ins = LoadedInstruction(name, list(args), bin)
print(FMT_DEBUG + "Running instruction " + ins + FMT_NONE)
ins = SimpleInstruction(
name,
tuple(args),
context,
cpu.pc)
print(FMT_DEBUG + "Running instruction {}".format(ins) + FMT_NONE)
cpu.run_instruction(ins)
def cont(verbose=False):
cpu.continue_from_debugger(verbose)
try:
cpu.run(verbose)
except LaunchDebuggerException:
print(FMT_DEBUG + 'Returning to debugger...')
return
def step():
cpu.step()
try:
cpu.step()
except LaunchDebuggerException:
return
# collect all variables
sess_vars = globals()
sess_vars.update(locals())
# add tab completion
readline.set_completer(rlcompleter.Completer(sess_vars).complete)
readline.parse_and_bind("tab: complete")
code.InteractiveConsole(sess_vars).interact(banner=FMT_DEBUG + prompt + FMT_NONE, exitmsg="Exiting debugger")
cpu.active_debug = False
if os.path.exists(HIST_FILE):
readline.read_history_file(HIST_FILE)
relaunch_debugger = False
try:
code.InteractiveConsole(sess_vars).interact(
banner=FMT_DEBUG + prompt + FMT_NONE,
exitmsg="Exiting debugger",
)
finally:
cpu.debugger_active = False
readline.write_history_file(HIST_FILE)

View File

@ -20,11 +20,11 @@ def format_ins(ins: int, name: str, fmt: str = 'int'):
return f"{name} <unknown op>"
decoder = INSTRUCTION_ARGS_DECODER[opcode]
if name in ('ecall', 'ebreak'):
if name in ('ecall', 'ebreak', 'mret', 'sret', 'uret'):
return name
if opcode in (0x8, 0x0):
r1, r2, imm = decoder(ins)
return f"{name:<7} {r1}, {imm}({r2})"
return f"{name:<7} {RISCV_REGS[r1]}, {imm}({RISCV_REGS[r2]})"
elif decoder in (decode_i, decode_i_unsigned, decode_b, decode_i_shamt, decode_s):
r1, r2, imm = decoder(ins)
r1, r2 = RISCV_REGS[r1], RISCV_REGS[r2]

View File

@ -5,7 +5,10 @@ SPDX-License-Identifier: MIT
"""
from math import log10, ceil
from .Exceptions import *
from typing import Iterable, Iterator, TypeVar, Generic, List, Optional
from .types.exceptions import *
from .types import Int32, UInt32
def align_addr(addr: int, to_bytes: int = 8) -> int:
@ -27,39 +30,6 @@ def parse_numeric_argument(arg: str) -> int:
raise ParseException('Invalid immediate argument \"{}\", maybe missing symbol?'.format(arg), (arg, ex))
def int_to_bytes(val, bytes=4, unsigned=False) -> bytearray:
"""
int -> byte (two's complement)
"""
if unsigned and val < 0:
raise NumberFormatException("unsigned negative number!")
return bytearray(to_unsigned(val, bytes).to_bytes(bytes, 'little'))
def int_from_bytes(bytes, unsigned=False) -> int:
"""
byte -> int (two's complement)
"""
num = int.from_bytes(bytes, 'little')
if unsigned:
return num
return to_signed(num)
def to_unsigned(num: int, bytes=4) -> int:
if num < 0:
return (2 ** (bytes * 8)) + num
return num
def to_signed(num: int, bytes=4) -> int:
if num >> (bytes * 8 - 1):
return num - 2 ** (8 * bytes)
return num
def create_chunks(my_list, chunk_size):
"""Split a list like [a,b,c,d,e,f,g,h,i,j,k,l,m] into e.g. [[a,b,c,d],[e,f,g,h],[i,j,k,l],[m]]"""
return [my_list[i:i + chunk_size] for i in range(0, len(my_list), chunk_size)]
@ -85,10 +55,10 @@ def format_bytes(byte_arr: bytearray, fmt: str, group: int = 1, highlight: int =
return highlight_in_list(['0x{}'.format(ch.hex()) for ch in chunks], highlight)
if fmt == 'int':
spc = str(ceil(log10(2 ** (group * 8 - 1))) + 1)
return highlight_in_list([('{:0' + spc + 'd}').format(int_from_bytes(ch)) for ch in chunks], highlight)
return highlight_in_list([('{:0' + spc + 'd}').format(Int32(ch)) for ch in chunks], highlight)
if fmt == 'uint':
spc = str(ceil(log10(2 ** (group * 8))))
return highlight_in_list([('{:0' + spc + 'd}').format(int_from_bytes(ch, unsigned=True)) for ch in chunks],
return highlight_in_list([('{:0' + spc + 'd}').format(UInt32(ch)) for ch in chunks],
highlight)
if fmt == 'ascii':
return "".join(repr(chr(b))[1:-1] for b in byte_arr)
@ -105,3 +75,42 @@ def bind_twos_complement(val):
elif val > 2147483647:
return val - 4294967296
return val
T = TypeVar('T')
class Peekable(Generic[T], Iterator[T]):
def __init__(self, iterable: Iterable[T]):
self.iterable = iter(iterable)
self.cache: List[T] = list()
def __iter__(self) -> Iterator[T]:
return self
def __next__(self) -> T:
if self.cache:
return self.cache.pop()
return next(self.iterable)
def peek(self) -> Optional[T]:
try:
if self.cache:
return self.cache[0]
pop = next(self.iterable)
self.cache.append(pop)
return pop
except StopIteration:
return None
def push_back(self, item: T):
self.cache = [item] + self.cache
def is_empty(self) -> bool:
return self.peek() is None
def get_section_base_name(section_name: str) -> str:
if '.' not in section_name:
print(FMT_PARSE + f"Invalid section {section_name}, not starting with a dot!" + FMT_NONE)
return '.' + section_name.split('.')[1]

View File

@ -1,6 +1,6 @@
from .InstructionSet import InstructionSet, LoadedInstruction
from ..Exceptions import INS_NOT_IMPLEMENTED
from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed
from .instruction_set import InstructionSet, Instruction
from riscemu.types.exceptions import INS_NOT_IMPLEMENTED
from ..types import Int32, UInt32
class RV32A(InstructionSet):
@ -10,69 +10,69 @@ class RV32A(InstructionSet):
for this?
"""
def instruction_lr_w(self, ins: 'LoadedInstruction'):
def instruction_lr_w(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_sc_w(self, ins: 'LoadedInstruction'):
def instruction_sc_w(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_amoswap_w(self, ins: 'LoadedInstruction'):
def instruction_amoswap_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
if dest == 'zero':
self.mmu.write(addr, int_to_bytes(addr, 4))
self.mmu.write(addr, val.to_bytes())
else:
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(val, 4))
old = Int32(self.mmu.read(addr, 4))
self.mmu.write(addr, val.to_bytes())
self.regs.set(dest, old)
def instruction_amoadd_w(self, ins: 'LoadedInstruction'):
def instruction_amoadd_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old + val, 4))
old = Int32(self.mmu.read(addr, 4))
self.mmu.write(addr, (old + val).to_bytes(4))
self.regs.set(dest, old)
def instruction_amoand_w(self, ins: 'LoadedInstruction'):
def instruction_amoand_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old & val, 4))
old = Int32(self.mmu.read(addr, 4))
self.mmu.write(addr, (old & val).to_bytes(4))
self.regs.set(dest, old)
def instruction_amoor_w(self, ins: 'LoadedInstruction'):
def instruction_amoor_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old | val, 4))
old = Int32(self.mmu.read(addr, 4))
self.mmu.write(addr, (old | val).to_bytes(4))
self.regs.set(dest, old)
def instruction_amoxor_w(self, ins: 'LoadedInstruction'):
def instruction_amoxor_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old ^ val, 4))
old = Int32(self.mmu.read(addr, 4))
self.mmu.write(addr, (old ^ val).to_bytes(4))
self.regs.set(dest, old)
def instruction_amomax_w(self, ins: 'LoadedInstruction'):
def instruction_amomax_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(max(old, val), 4))
old = Int32(self.mmu.read(addr, 4))
self.mmu.write(addr, max(old, val).to_bytes(4))
self.regs.set(dest, old)
def instruction_amomaxu_w(self, ins: 'LoadedInstruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
val = to_unsigned(val)
old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True)
def instruction_amomaxu_w(self, ins: 'Instruction'):
val: UInt32
dest, addr, val = self.parse_rd_rs_rs(ins, signed=False)
old = UInt32(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(to_signed(max(old, val)), 4))
self.mmu.write(addr, max(old, val).to_bytes())
self.regs.set(dest, old)
def instruction_amomin_w(self, ins: 'LoadedInstruction'):
def instruction_amomin_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(min(old, val), 4))
old = Int32(self.mmu.read(addr, 4))
self.mmu.write(addr, min(old, val).to_bytes(4))
self.regs.set(dest, old)
def instruction_amominu_w(self, ins: 'LoadedInstruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
val = to_unsigned(val)
old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True)
def instruction_amominu_w(self, ins: 'Instruction'):
val: UInt32
dest, addr, val = self.parse_rd_rs_rs(ins, signed=False)
old = UInt32(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(to_signed(min(old, val)), 4))
self.mmu.write(addr, min(old, val).to_bytes(4))
self.regs.set(dest, old)

View File

@ -4,14 +4,13 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
from .InstructionSet import *
from .instruction_set import *
from ..CPU import UserModeCPU
from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed
from ..colors import FMT_DEBUG, FMT_NONE
from ..debug import launch_debug_session
from ..Exceptions import LaunchDebuggerException
from ..Syscall import Syscall
from ..Executable import LoadedInstruction
from riscemu.types.exceptions import LaunchDebuggerException
from ..syscall import Syscall
from ..types import Instruction, Int32, UInt32
class RV32I(InstructionSet):
@ -23,79 +22,79 @@ class RV32I(InstructionSet):
See https://maxvytech.com/images/RV32I-11-2018.pdf for a more detailed overview
"""
def instruction_lb(self, ins: 'LoadedInstruction'):
def instruction_lb(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1)))
self.regs.set(rd, Int32.sign_extend(self.mmu.read(addr.unsigned_value, 1), 8))
def instruction_lh(self, ins: 'LoadedInstruction'):
def instruction_lh(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2)))
self.regs.set(rd, Int32.sign_extend(self.mmu.read(addr.unsigned_value, 2), 16))
def instruction_lw(self, ins: 'LoadedInstruction'):
def instruction_lw(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 4)))
self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 4)))
def instruction_lbu(self, ins: 'LoadedInstruction'):
def instruction_lbu(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1), unsigned=True))
self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 1)))
def instruction_lhu(self, ins: 'LoadedInstruction'):
def instruction_lhu(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2), unsigned=True))
self.regs.set(rd, Int32(self.mmu.read(addr.unsigned_value, 2)))
def instruction_sb(self, ins: 'LoadedInstruction'):
def instruction_sb(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.mmu.write(addr, 1, int_to_bytes(self.regs.get(rd), 1))
self.mmu.write(addr.unsigned_value, 1, self.regs.get(rd).to_bytes(1))
def instruction_sh(self, ins: 'LoadedInstruction'):
def instruction_sh(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.mmu.write(addr, 2, int_to_bytes(self.regs.get(rd), 2))
self.mmu.write(addr.unsigned_value, 2, self.regs.get(rd).to_bytes(2))
def instruction_sw(self, ins: 'LoadedInstruction'):
def instruction_sw(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.mmu.write(addr, 4, int_to_bytes(self.regs.get(rd), 4))
self.mmu.write(addr.unsigned_value, 4, self.regs.get(rd).to_bytes(4))
def instruction_sll(self, ins: 'LoadedInstruction'):
def instruction_sll(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
src2 = ins.get_reg(2)
self.regs.set(
dst,
to_signed(to_unsigned(self.regs.get(src1)) << (self.regs.get(src2) & 0b11111))
self.regs.get(src1) << (self.regs.get(src2) & 0b11111)
)
def instruction_slli(self, ins: 'LoadedInstruction'):
def instruction_slli(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
imm = ins.get_imm(2)
self.regs.set(
dst,
to_signed(to_unsigned(self.regs.get(src1)) << (imm & 0b11111))
self.regs.get(src1) << (imm & 0b11111)
)
def instruction_srl(self, ins: 'LoadedInstruction'):
def instruction_srl(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
src2 = ins.get_reg(2)
self.regs.set(
dst,
to_signed(to_unsigned(self.regs.get(src1)) >> (self.regs.get(src2) & 0b11111))
self.regs.get(src1).shift_right_logical(self.regs.get(src2) & 0b11111)
)
def instruction_srli(self, ins: 'LoadedInstruction'):
def instruction_srli(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
imm = ins.get_imm(2)
self.regs.set(
dst,
to_signed(to_unsigned(self.regs.get(src1)) >> (imm & 0b11111))
self.regs.get(src1).shift_right_logical(imm & 0b11111)
)
def instruction_sra(self, ins: 'LoadedInstruction'):
def instruction_sra(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
@ -105,7 +104,7 @@ class RV32I(InstructionSet):
self.regs.get(src1) >> (self.regs.get(src2) & 0b11111)
)
def instruction_srai(self, ins: 'LoadedInstruction'):
def instruction_srai(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
@ -115,154 +114,148 @@ class RV32I(InstructionSet):
self.regs.get(src1) >> (imm & 0b11111)
)
def instruction_add(self, ins: 'LoadedInstruction'):
dst = ""
if self.cpu.conf.add_accept_imm:
try:
dst, rs1, rs2 = self.parse_rd_rs_imm(ins)
except:
pass
if not dst:
dst, rs1, rs2 = self.parse_rd_rs_rs(ins)
def instruction_add(self, ins: 'Instruction'):
# FIXME: once configuration is figured out, add flag to support immediate arg in add instruction
dst, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
dst,
rs1 + rs2
)
def instruction_addi(self, ins: 'LoadedInstruction'):
def instruction_addi(self, ins: 'Instruction'):
dst, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set(
dst,
rs1 + imm
)
def instruction_sub(self, ins: 'LoadedInstruction'):
def instruction_sub(self, ins: 'Instruction'):
dst, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
dst,
rs1 - rs2
)
def instruction_lui(self, ins: 'LoadedInstruction'):
def instruction_lui(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
imm = ins.get_imm(1)
self.regs.set(reg, imm << 12)
imm = UInt32(ins.get_imm(1) << 12)
self.regs.set(reg, Int32(imm))
def instruction_auipc(self, ins: 'LoadedInstruction'):
def instruction_auipc(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
imm = to_unsigned(ins.get_imm(1))
self.regs.set(reg, self.pc + (imm << 12))
imm = UInt32(ins.get_imm(1) << 12)
self.regs.set(reg, imm.signed() + self.pc)
def instruction_xor(self, ins: 'LoadedInstruction'):
def instruction_xor(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 ^ rs2
)
def instruction_xori(self, ins: 'LoadedInstruction'):
def instruction_xori(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set(
rd,
rs1 ^ imm
)
def instruction_or(self, ins: 'LoadedInstruction'):
def instruction_or(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 | rs2
)
def instruction_ori(self, ins: 'LoadedInstruction'):
def instruction_ori(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set(
rd,
rs1 | imm
)
def instruction_and(self, ins: 'LoadedInstruction'):
def instruction_and(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 & rs2
)
def instruction_andi(self, ins: 'LoadedInstruction'):
def instruction_andi(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set(
rd,
rs1 & imm
)
def instruction_slt(self, ins: 'LoadedInstruction'):
def instruction_slt(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
int(rs1 < rs2)
Int32(int(rs1 < rs2))
)
def instruction_slti(self, ins: 'LoadedInstruction'):
def instruction_slti(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set(
rd,
int(rs1 < imm)
Int32(int(rs1 < imm))
)
def instruction_sltu(self, ins: 'LoadedInstruction'):
def instruction_sltu(self, ins: 'Instruction'):
dst, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
self.regs.set(
dst,
int(rs1 < rs2)
Int32(int(rs1 < rs2))
)
def instruction_sltiu(self, ins: 'LoadedInstruction'):
def instruction_sltiu(self, ins: 'Instruction'):
dst, rs1, imm = self.parse_rd_rs_imm(ins, signed=False)
self.regs.set(
dst,
int(rs1 < imm)
Int32(int(rs1 < imm))
)
def instruction_beq(self, ins: 'LoadedInstruction'):
def instruction_beq(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 == rs2:
self.pc = dst
self.pc = dst.unsigned_value
def instruction_bne(self, ins: 'LoadedInstruction'):
def instruction_bne(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 != rs2:
self.pc = dst
self.pc = dst.unsigned_value
def instruction_blt(self, ins: 'LoadedInstruction'):
def instruction_blt(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 < rs2:
self.pc = dst
self.pc = dst.unsigned_value
def instruction_bge(self, ins: 'LoadedInstruction'):
def instruction_bge(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 >= rs2:
self.pc = dst
self.pc = dst.unsigned_value
def instruction_bltu(self, ins: 'LoadedInstruction'):
def instruction_bltu(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
if rs1 < rs2:
self.pc = dst
self.pc = dst.unsigned_value
def instruction_bgeu(self, ins: 'LoadedInstruction'):
def instruction_bgeu(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
if rs1 >= rs2:
self.pc = dst
self.pc = dst.unsigned_value
# technically deprecated
def instruction_j(self, ins: 'LoadedInstruction'):
def instruction_j(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 1)
addr = ins.get_imm(0)
self.pc = addr
def instruction_jal(self, ins: 'LoadedInstruction'):
def instruction_jal(self, ins: 'Instruction'):
reg = 'ra' # default register is ra
if len(ins.args) == 1:
addr = ins.get_imm(0)
@ -270,60 +263,59 @@ class RV32I(InstructionSet):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
addr = ins.get_imm(1)
self.regs.set(reg, self.pc)
self.regs.set(reg, Int32(self.pc))
self.pc = addr
def instruction_jalr(self, ins: 'LoadedInstruction'):
def instruction_jalr(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
addr = ins.get_imm(1)
self.regs.set(reg, self.pc)
self.regs.set(reg, Int32(self.pc))
self.pc = addr
def instruction_ret(self, ins: 'LoadedInstruction'):
def instruction_ret(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0)
self.pc = self.regs.get('ra')
self.pc = self.regs.get('ra').value
def instruction_ecall(self, ins: 'LoadedInstruction'):
def instruction_ecall(self, ins: 'Instruction'):
self.instruction_scall(ins)
def instruction_ebreak(self, ins: 'LoadedInstruction'):
def instruction_ebreak(self, ins: 'Instruction'):
self.instruction_sbreak(ins)
def instruction_scall(self, ins: 'LoadedInstruction'):
def instruction_scall(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0)
if not isinstance(self.cpu, UserModeCPU):
# FIXME: add exception for syscall not supported or something
raise
syscall = Syscall(self.regs.get('a7'), self.cpu)
self.cpu.syscall_int.handle_syscall(syscall)
def instruction_sbreak(self, ins: 'LoadedInstruction'):
def instruction_sbreak(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0)
if self.cpu.active_debug:
print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE)
raise LaunchDebuggerException()
launch_debug_session(
self.cpu,
self.mmu,
self.regs,
"Debug instruction encountered at 0x{:08X}".format(self.pc - 1)
)
def instruction_nop(self, ins: 'LoadedInstruction'):
print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE)
raise LaunchDebuggerException()
def instruction_nop(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0)
pass
def instruction_li(self, ins: 'LoadedInstruction'):
def instruction_li(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
immediate = ins.get_imm(1)
self.regs.set(reg, immediate)
self.regs.set(reg, Int32(immediate))
def instruction_la(self, ins: 'LoadedInstruction'):
def instruction_la(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
immediate = ins.get_imm(1)
self.regs.set(reg, immediate)
self.regs.set(reg, Int32(immediate))
def instruction_mv(self, ins: 'LoadedInstruction'):
def instruction_mv(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
rd, rs = ins.get_reg(0), ins.get_reg(1)
self.regs.set(rd, self.regs.get(rs))

View File

@ -4,56 +4,56 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
from .InstructionSet import *
from ..Exceptions import INS_NOT_IMPLEMENTED
from .instruction_set import *
from riscemu.types.exceptions import INS_NOT_IMPLEMENTED
class RV32M(InstructionSet):
"""
The RV32M Instruction set, containing multiplication and division instructions
"""
def instruction_mul(self, ins: 'LoadedInstruction'):
def instruction_mul(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 * rs2
)
def instruction_mulh(self, ins: 'LoadedInstruction'):
def instruction_mulh(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
(rs1 * rs2) >> 32
)
def instruction_mulhsu(self, ins: 'LoadedInstruction'):
def instruction_mulhsu(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_mulhu(self, ins: 'LoadedInstruction'):
def instruction_mulhu(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_div(self, ins: 'LoadedInstruction'):
def instruction_div(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 // rs2
)
def instruction_divu(self, ins: 'LoadedInstruction'):
def instruction_divu(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
self.regs.set(
rd,
rs1 // rs2
)
def instruction_rem(self, ins: 'LoadedInstruction'):
def instruction_rem(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 % rs2
)
def instruction_remu(self, ins: 'LoadedInstruction'):
def instruction_remu(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
self.regs.set(
rd,

View File

@ -6,7 +6,7 @@ SPDX-License-Identifier: MIT
This package holds all instruction sets, available to the processor
"""
from .InstructionSet import InstructionSet
from .instruction_set import InstructionSet
from .RV32M import RV32M
from .RV32I import RV32I
from .RV32A import RV32A

View File

@ -8,8 +8,8 @@ from typing import Tuple, Callable, Dict
from abc import ABC
from ..CPU import CPU
from ..helpers import ASSERT_LEN, ASSERT_IN, to_unsigned
from ..Executable import LoadedInstruction
from riscemu.types.exceptions import ASSERT_LEN, ASSERT_IN
from ..types import Instruction, Int32, UInt32
class InstructionSet(ABC):
@ -30,7 +30,7 @@ class InstructionSet(ABC):
self.name = self.__class__.__name__
self.cpu = cpu
def load(self) -> Dict[str, Callable[['LoadedInstruction'], None]]:
def load(self) -> Dict[str, Callable[['Instruction'], None]]:
"""
This is called by the CPU once it instantiates this instruction set
@ -51,7 +51,7 @@ class InstructionSet(ABC):
if member.startswith('instruction_'):
yield member[12:].replace('_', '.'), getattr(self, member)
def parse_mem_ins(self, ins: 'LoadedInstruction') -> Tuple[str, int]:
def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, Int32]:
"""
parses both rd, rs, imm and rd, imm(rs) argument format and returns (rd, imm+rs1)
(so a register and address tuple for memory instructions)
@ -69,7 +69,7 @@ class InstructionSet(ABC):
rd = ins.get_reg(0)
return rd, rs + imm
def parse_rd_rs_rs(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]:
def parse_rd_rs_rs(self, ins: 'Instruction', signed=True) -> Tuple[str, Int32, Int32]:
"""
Assumes the command is in <name> rd, rs1, rs2 format
Returns the name of rd, and the values in rs1 and rs2
@ -81,10 +81,10 @@ class InstructionSet(ABC):
self.get_reg_content(ins, 2)
else:
return ins.get_reg(0), \
to_unsigned(self.get_reg_content(ins, 1)), \
to_unsigned(self.get_reg_content(ins, 2))
UInt32(self.get_reg_content(ins, 1)), \
UInt32(self.get_reg_content(ins, 2))
def parse_rd_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]:
def parse_rd_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[str, Int32, Int32]:
"""
Assumes the command is in <name> rd, rs, imm format
Returns the name of rd, the value in rs and the immediate imm
@ -92,28 +92,28 @@ class InstructionSet(ABC):
ASSERT_LEN(ins.args, 3)
if signed:
return ins.get_reg(0), \
self.get_reg_content(ins, 1), \
ins.get_imm(2)
Int32(self.get_reg_content(ins, 1)), \
Int32(ins.get_imm(2))
else:
return ins.get_reg(0), \
to_unsigned(self.get_reg_content(ins, 1)), \
to_unsigned(ins.get_imm(2))
UInt32(self.get_reg_content(ins, 1)), \
UInt32(ins.get_imm(2))
def parse_rs_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[int, int, int]:
def parse_rs_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[Int32, Int32, Int32]:
"""
Assumes the command is in <name> rs1, rs2, imm format
Returns the values in rs1, rs2 and the immediate imm
"""
if signed:
return self.get_reg_content(ins, 0), \
self.get_reg_content(ins, 1), \
ins.get_imm(2)
return Int32(self.get_reg_content(ins, 0)), \
Int32(self.get_reg_content(ins, 1)), \
Int32(ins.get_imm(2))
else:
return to_unsigned(self.get_reg_content(ins, 0)), \
to_unsigned(self.get_reg_content(ins, 1)), \
to_unsigned(ins.get_imm(2))
return UInt32(self.get_reg_content(ins, 0)), \
UInt32(self.get_reg_content(ins, 1)), \
UInt32(ins.get_imm(2))
def get_reg_content(self, ins: 'LoadedInstruction', ind: int) -> int:
def get_reg_content(self, ins: 'Instruction', ind: int) -> Int32:
"""
get the register name from ins and then return the register contents
"""

24
riscemu/interactive.py Normal file
View File

@ -0,0 +1,24 @@
from riscemu import RunConfig
from riscemu.types import InstructionMemorySection, SimpleInstruction, Program
if __name__ == '__main__':
from .CPU import UserModeCPU
from .instructions import InstructionSetDict
from .debug import launch_debug_session
cpu = UserModeCPU(list(InstructionSetDict.values()), RunConfig(verbosity=4))
program = Program('interactive session', base=0x100)
context = program.context
program.add_section(InstructionMemorySection([
SimpleInstruction('ebreak', (), context, 0x100),
SimpleInstruction('addi', ('a0', 'zero', '0'), context, 0x104),
SimpleInstruction('addi', ('a7', 'zero', '93'), context, 0x108),
SimpleInstruction('scall', (), context, 0x10C),
], '.text', context, program.name, 0x100))
cpu.load_program(program)
cpu.setup_stack()
cpu.launch(program)

127
riscemu/parser.py Normal file
View File

@ -0,0 +1,127 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import re
from typing import Dict, Tuple, Iterable, Callable, List
from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
from .colors import FMT_PARSE
from .helpers import Peekable
from .tokenizer import Token, TokenType, tokenize
from .types import Program, T_ParserOpts, ProgramLoader, SimpleInstruction
from .types.exceptions import ParseException
def parse_instruction(token: Token, args: Tuple[str], context: ParseContext):
if context.section is None or context.section.type != MemorySectionType.Instructions:
raise ParseException("{} {} encountered in invalid context: {}".format(token, args, context))
ins = SimpleInstruction(token.value, args, context.context, context.section.current_address())
context.section.data.append(ins)
def parse_label(token: Token, args: Tuple[str], context: ParseContext):
name = token.value[:-1]
if re.match(r'^\d+$', name):
# relative label:
context.context.numbered_labels[name].append(context.section.current_address())
else:
if name in context.context.labels:
print(FMT_PARSE + 'Warn: Symbol {} defined twice!'.format(name))
context.add_label(name, context.section.current_address(), is_relative=True)
PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {
TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction,
TokenType.LABEL: parse_label,
TokenType.INSTRUCTION_NAME: parse_instruction
}
def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
"""
Convert a token stream into a parsed program
:param name: the programs name
:param tokens_iter: the programs content, tokenized
:return: a parsed program
"""
context = ParseContext(name)
for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
if token.type not in PARSERS:
raise ParseException("Unexpected token type: {}, {}".format(token, args))
PARSERS[token.type](token, args, context)
return context.finalize()
def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]:
"""
Convert an iterator over tokens into an iterator over tuples: (token, list(token))
The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before
a newline is encountered
:param tokens_iter: An iterator over tokens
:return: An iterator over a slightly more structured representation of the tokens
"""
tokens: Peekable[Token] = Peekable[Token](tokens_iter)
while not tokens.is_empty():
token = next(tokens)
if token.type in (TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME):
yield token, tuple(take_arguments(tokens))
def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
"""
Consumes (argument comma)* and yields argument.value until newline is reached
If an argument is not followed by either a newline or a comma, a parse exception is raised
The newline at the end is consumed
:param tokens: A Peekable iterator over some Tokens
"""
while True:
if tokens.peek().type == TokenType.ARGUMENT:
yield next(tokens).value
elif tokens.peek().type == TokenType.NEWLINE:
next(tokens)
break
elif tokens.peek().type == TokenType.COMMA:
next(tokens)
else:
break
# raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
class AssemblyFileLoader(ProgramLoader):
"""
This class loads assembly files written by hand. It understands some assembler directives and supports most
pseudo instructions. It does very little verification of source correctness.
It also supports numbered jump targets and properly supports local and global scope (.globl assembly directive)
The AssemblyFileLoader loads .asm, .S and .s files by default, and acts as a weak fallback to all other filetypes.
"""
def parse(self) -> Program:
with open(self.source_path, 'r') as f:
return parse_tokens(self.filename, tokenize(f))
@classmethod
def can_parse(cls, source_path: str) -> float:
"""
It also acts as a weak fallback if no other loaders want to take the file.
:param source_path: the path to the source file
:return:
"""
# gcc recognizes these line endings as assembly. So we will do too.
if source_path.split('.')[-1] in ('asm', 'S', 's'):
return 1
return 0.01
@classmethod
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
return argv, {}

View File

@ -2,49 +2,49 @@ from typing import Dict, Union, Callable, Optional
from collections import defaultdict
from .privmodes import PrivModes
from .Exceptions import InstructionAccessFault
from ..helpers import to_signed
from ..colors import FMT_CSR, FMT_NONE
from .CSRConsts import CSR_NAME_TO_ADDR, MSTATUS_LEN_2, MSTATUS_OFFSETS
from ..types import UInt32
class CSR:
"""
This holds all Control and Status Registers (CSR)
"""
regs: Dict[int, int]
regs: Dict[int, UInt32]
"""
All Control and Status Registers are stored here
"""
virtual_regs: Dict[int, Callable[[], int]]
virtual_regs: Dict[int, Callable[[], UInt32]]
"""
list of virtual CSR registers, with values computed on read
"""
listeners: Dict[int, Callable[[int, int], None]]
listeners: Dict[int, Callable[[UInt32, UInt32], None]]
mstatus_cache: Dict[str, int]
mstatus_cache: Dict[str, UInt32]
mstatus_cache_dirty = True
def __init__(self):
self.regs = defaultdict(lambda: 0)
self.regs = defaultdict(lambda: UInt32(0))
self.listeners = defaultdict(lambda: (lambda x, y: None))
self.virtual_regs = dict()
self.mstatus_cache = dict()
# TODO: implement write masks (bitmasks which control writeable bits in registers
def set(self, addr: Union[str, int], val: int):
def set(self, addr: Union[str, int], val: Union[int, UInt32]):
addr = self._name_to_addr(addr)
if addr is None:
return
val = to_signed(val)
val = UInt32(val)
self.listeners[addr](self.regs[addr], val)
if addr == 0x300:
self.mstatus_cache_dirty = True
self.regs[addr] = val
def get(self, addr: Union[str, int]) -> int:
def get(self, addr: Union[str, int]) -> UInt32:
addr = self._name_to_addr(addr)
if addr is None:
raise RuntimeError(f"Invalid CSR name: {addr}!")
@ -52,7 +52,7 @@ class CSR:
return self.virtual_regs[addr]()
return self.regs[addr]
def set_listener(self, addr: Union[str, int], listener: Callable[[int, int], None]):
def set_listener(self, addr: Union[str, int], listener: Callable[[UInt32, UInt32], None]):
addr = self._name_to_addr(addr)
if addr is None:
print("unknown csr address name: {}".format(addr))
@ -60,7 +60,7 @@ class CSR:
self.listeners[addr] = listener
# mstatus properties
def set_mstatus(self, name: str, val: int):
def set_mstatus(self, name: str, val: UInt32):
"""
Set mstatus bits using this helper. mstatus is a 32 bit register, holding various machine status flags
Setting them by hand is super painful, so this helper allows you to set specific bits.
@ -79,7 +79,7 @@ class CSR:
new_val = erased | (val << off)
self.set('mstatus', new_val)
def get_mstatus(self, name) -> int:
def get_mstatus(self, name) -> UInt32:
if not self.mstatus_cache_dirty and name in self.mstatus_cache:
return self.mstatus_cache[name]
@ -94,7 +94,7 @@ class CSR:
return val
def callback(self, addr: Union[str, int]):
def inner(func: Callable[[int, int], None]):
def inner(func: Callable[[UInt32, UInt32], None]):
self.set_listener(addr, func)
return func
@ -121,7 +121,7 @@ class CSR:
if addr is None:
print("unknown csr address name: {}".format(addr))
def inner(func: Callable[[], int]):
def inner(func: Callable[[], UInt32]):
self.virtual_regs[addr] = func
return func

View File

@ -1,11 +1,9 @@
from dataclasses import dataclass
from typing import List, Dict, Tuple
from typing import List
from .Exceptions import *
from ..Exceptions import RiscemuBaseException
from ..Executable import MemoryFlags, LoadedMemorySection
from ..decoder import decode, RISCV_REGS, format_ins
from .types import ElfMemorySection
from ..helpers import FMT_PARSE, FMT_NONE, FMT_GREEN, FMT_BOLD
from ..types import MemoryFlags, Program, ProgramLoader, T_ParserOpts
FMT_ELF = FMT_GREEN + FMT_BOLD
@ -13,41 +11,53 @@ if typing.TYPE_CHECKING:
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import Section, SymbolTableSection
# This requires pyelftools package!
INCLUDE_SEC = ('.text', '.stack', '.bss', '.sdata', '.sbss')
class ElfExecutable:
sections: List['ElfLoadedMemorySection']
sections_by_name: Dict[str, 'ElfLoadedMemorySection']
symbols: Dict[str, int]
run_ptr: int
class ElfBinaryFileLoader(ProgramLoader):
"""
Loads compiled elf binaries (checks for the magic sequence 7f45 4c46)
def __init__(self, name: str):
self.sections = list()
self.sections_by_name = dict()
self.symbols = dict()
This loader respects local and global symbols.
"""
program: Program
def __init__(self, source_path: str, options: T_ParserOpts):
super().__init__(source_path, options)
self.program = Program(self.filename)
@classmethod
def can_parse(cls, source_path: str) -> float:
with open(source_path, 'rb') as f:
if f.read(4) == b'\x7f\x45\x4c\x46':
return 1
return 0
@classmethod
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
return argv, {}
def parse(self) -> Program:
try:
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import Section, SymbolTableSection
with open(name, 'rb') as f:
print(FMT_ELF + "[ElfLoader] Loading elf executable from: {}".format(name) + FMT_NONE)
with open(self.source_path, 'rb') as f:
print(FMT_ELF + "[ElfLoader] Loading elf executable from: {}".format(self.source_path) + FMT_NONE)
self._read_elf(ELFFile(f))
except ImportError as e:
print(FMT_PARSE + "[ElfLoader] Cannot load elf files without PyElfTools package! You can install them using pip install pyelftools!" + FMT_NONE)
print(FMT_PARSE + "[ElfLoader] Cannot load elf files without PyElfTools package! You can install them "
"using pip install pyelftools!" + FMT_NONE)
raise e
return self.program
def _read_elf(self, elf: 'ELFFile'):
if not elf.header.e_machine == 'EM_RISCV':
raise InvalidElfException("Not a RISC-V elf file!")
if not elf.header.e_ident.EI_CLASS == 'ELFCLASS32':
raise InvalidElfException("Only 32bit executables are supported!")
self.run_ptr = elf.header.e_entry
from elftools.elf.sections import SymbolTableSection
for sec in elf.iter_sections():
if isinstance(sec, SymbolTableSection):
@ -57,29 +67,33 @@ class ElfExecutable:
if sec.name not in INCLUDE_SEC:
continue
self.add_sec(self._lms_from_elf_sec(sec, 'kernel'))
self._add_sec(self._lms_from_elf_sec(sec, self.filename))
def _lms_from_elf_sec(self, sec: 'Section', owner: str):
is_code = sec.name in ('.text',)
data = bytearray(sec.data())
if len(data) < sec.data_size:
data += bytearray(len(data) - sec.data_size)
flags = MemoryFlags(is_code, is_code)
print(FMT_ELF + "[ElfLoader] Section {} at: {:X}".format(sec.name, sec.header.sh_addr) + FMT_NONE)
return ElfLoadedMemorySection(
sec.name,
sec.header.sh_addr,
sec.data_size,
data,
flags,
owner
return ElfMemorySection(
data, sec.name, self.program.context, owner, sec.header.sh_addr, flags
)
def _parse_symtab(self, symtab: 'SymbolTableSection'):
self.symbols = {
sym.name: sym.entry.st_value for sym in symtab.iter_symbols() if sym.name
}
from elftools.elf.enums import ENUM_ST_VISIBILITY
def add_sec(self, new_sec: 'ElfLoadedMemorySection'):
for sec in self.sections:
for sym in symtab.iter_symbols():
if not sym.name:
continue
self.program.context.labels[sym.name] = sym.entry.st_value
# check if it has st_visibility bit set
if sym.entry.st_info.bind == 'STB_GLOBAL':
self.program.global_labels.add(sym.name)
print(FMT_PARSE + "LOADED GLOBAL SYMBOL {}: {}".format(sym.name, sym.entry.st_value) + FMT_NONE)
def _add_sec(self, new_sec: 'ElfMemorySection'):
for sec in self.program.sections:
if sec.base < sec.end <= new_sec.base or sec.end > sec.base >= new_sec.end:
continue
else:
@ -88,78 +102,4 @@ class ElfExecutable:
) + FMT_NONE)
raise RuntimeError("Cannot load elf with overlapping sections!")
self.sections.append(new_sec)
self.sections_by_name[new_sec.name] = new_sec
class InvalidElfException(RiscemuBaseException):
def __init__(self, msg: str):
super().__init__()
self.msg = msg
def message(self):
return FMT_PARSE + "{}(\"{}\")".format(self.__class__.__name__, self.msg) + FMT_NONE
@dataclass(frozen=True)
class ElfInstruction:
name: str
args: List[int]
encoded: int
def get_imm(self, num: int) -> int:
return self.args[num]
def get_imm_reg(self, num: int) -> Tuple[int, int]:
return self.args[-1], self.args[-2]
def get_reg(self, num: int) -> str:
return RISCV_REGS[self.args[num]]
def __repr__(self) -> str:
if self.name == 'jal' and self.args[0] == 0:
return "j {}".format(self.args[1])
if self.name == 'addi' and self.args[2] == 0:
return "mv {}, {}".format(self.get_reg(0), self.get_reg(1))
if self.name == 'addi' and self.args[1] == 0:
return "li {}, {}".format(self.get_reg(0), self.args[2])
if self.name == 'ret' and len(self.args) == 0:
return "ret"
return format_ins(self.encoded, self.name)
# if self.name in ('lw', 'lh', 'lb', 'lbu', 'lhu', 'sw', 'sh', 'sb'):
# args = "{}, {}({})".format(
# RISCV_REGS[self.args[0]], self.args[2], RISCV_REGS[self.args[1]]
# )
# else:
# args = ", ".join(map(str, self.args))
# return "{:<8} {}".format(
# self.name,
# args
# )
class ElfLoadedMemorySection(LoadedMemorySection):
ins_cache: List[Optional[ElfInstruction]]
"""
A fast cache for accessing pre-decoded instructions
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__setattr__('ins_cache', [None] * (self.size // 4))
def read_instruction(self, offset):
if self.ins_cache[offset//4] is not None:
return self.ins_cache[offset//4]
if not self.flags.executable:
print(FMT_PARSE + "Reading instruction from non-executable memory!" + FMT_NONE)
raise InstructionAccessFault(offset + self.base)
if offset % 4 != 0:
raise InstructionAddressMisalignedTrap(offset + self.base)
ins = ElfInstruction(*decode(self.content[offset:offset + 4]))
self.ins_cache[offset // 4] = ins
return ins
@property
def end(self):
return self.size + self.base
self.program.add_section(new_sec)

View File

@ -5,6 +5,10 @@ from .CSRConsts import MCAUSE_TRANSLATION
import typing
from .. import RiscemuBaseException
from ..colors import FMT_PARSE, FMT_NONE
from ..types import UInt32
if typing.TYPE_CHECKING:
from .ElfLoader import ElfInstruction
@ -26,7 +30,7 @@ class CpuTrap(BaseException):
The isInterrupt bit in the mstatus register
"""
mtval: int
mtval: UInt32
"""
contents of the mtval register
"""
@ -44,7 +48,7 @@ class CpuTrap(BaseException):
def __init__(self, code: int, mtval, type: CpuTrapType, priv: PrivModes = PrivModes.MACHINE):
self.interrupt = 0 if type == CpuTrapType.EXCEPTION else 1
self.code = code
self.mtval = mtval
self.mtval = UInt32(mtval)
self.priv = priv
self.type = type
@ -52,14 +56,17 @@ class CpuTrap(BaseException):
def mcause(self):
return (self.interrupt << 31) + self.code
def message(self) -> str:
return ""
def __repr__(self):
name = "Reserved interrupt({}, {})".format(self.interrupt, self.code)
if (self.interrupt, self.code) in MCAUSE_TRANSLATION:
name = MCAUSE_TRANSLATION[(self.interrupt, self.code)] + "({}, {})".format(self.interrupt, self.code)
return "{} {{priv={}, type={}, mtval={:x}}}".format(
name, self.priv.name, self.type.name, self.mtval
return "{} {{priv={}, type={}, mtval={:x}}} {}".format(
name, self.priv.name, self.type.name, self.mtval, self.message()
)
def __str__(self):
@ -89,3 +96,29 @@ class TimerInterrupt(CpuTrap):
class EcallTrap(CpuTrap):
def __init__(self, mode: PrivModes):
super().__init__(mode.value + 8, 0, CpuTrapType.EXCEPTION)
class InvalidElfException(RiscemuBaseException):
def __init__(self, msg: str):
super().__init__()
self.msg = msg
def message(self):
return FMT_PARSE + "{}(\"{}\")".format(self.__class__.__name__, self.msg) + FMT_NONE
class LoadAccessFault(CpuTrap):
def __init__(self, msg, addr, size, op):
super(LoadAccessFault, self).__init__(5, addr, CpuTrapType.EXCEPTION)
self.msg = msg
self.addr = addr
self.size = size
self.op = op
def message(self):
return "(During {} at 0x{:08x} of size {}: {})".format(
self.op,
self.addr,
self.size,
self.msg
)

View File

@ -2,124 +2,74 @@
Laods a memory image with debug information into memory
"""
import json
from functools import lru_cache
from typing import Dict, List, Optional, TYPE_CHECKING
import os.path
from typing import List, Iterable
from .ElfLoader import ElfInstruction, ElfLoadedMemorySection, InstructionAccessFault, InstructionAddressMisalignedTrap
from .PrivMMU import PrivMMU
from ..Config import RunConfig
from ..Executable import LoadedMemorySection, MemoryFlags
from ..IO.IOModule import IOModule
from ..colors import FMT_ERROR, FMT_NONE, FMT_MEM
from ..decoder import decode
if TYPE_CHECKING:
pass
from .ElfLoader import ElfMemorySection
from .types import MemoryImageDebugInfos
from ..assembler import INSTRUCTION_SECTION_NAMES
from ..colors import FMT_NONE, FMT_PARSE
from ..helpers import get_section_base_name
from ..types import MemoryFlags, ProgramLoader, Program, T_ParserOpts
class MemoryImageMMU(PrivMMU):
io: List[IOModule]
data: bytearray
io_start: int
debug_info: Dict[str, Dict[str, Dict[str, str]]]
class MemoryImageLoader(ProgramLoader):
def __init__(self, file_name: str, io_start: int = 0xFF0000):
super(MemoryImageMMU, self).__init__(conf=RunConfig())
@classmethod
def can_parse(cls, source_path: str) -> float:
if source_path.split('.')[-1] == 'img':
return 1
return 0
with open(file_name, 'rb') as memf:
data = memf.read()
with open(file_name + '.dbg', 'r') as dbgf:
debug_info: Dict = json.load(dbgf)
@classmethod
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
return argv, {}
self.data = bytearray(data)
# TODO: super wasteful memory allocation happening here
if len(data) < io_start:
self.data += bytearray(io_start - len(data))
self.debug_info = debug_info
self.io_start = io_start
self.io = list()
def parse(self) -> Iterable[Program]:
if 'debug' not in self.options:
yield self.parse_no_debug()
return
def get_entrypoint(self):
try:
start = self.debug_info['symbols']['kernel'].get('_start', None)
if start is not None:
return start
return self.debug_info['symbols']['kernel'].get('_ftext')
except KeyError:
print(FMT_ERROR + '[MMU] cannot find kernel entry in debug information! Falling back to 0x100' + FMT_NONE)
return 0x100
with open(self.options.get('debug'), 'r') as debug_file:
debug_info = MemoryImageDebugInfos.load(debug_file.read())
@lru_cache(maxsize=2048)
def read_ins(self, addr: int) -> ElfInstruction:
if addr >= self.io_start:
raise InstructionAccessFault(addr)
if addr % 4 != 0:
raise InstructionAddressMisalignedTrap(addr)
with open(self.source_path, 'rb') as source_file:
data: bytearray = bytearray(source_file.read())
return ElfInstruction(*decode(self.data[addr:addr + 4]))
for name, sections in debug_info.sections.items():
program = Program(name)
def read(self, addr: int, size: int) -> bytearray:
if addr < 0x100:
pc = self.cpu.pc
text_sec = self.get_sec_containing(pc)
print(FMT_ERROR + "[MMU] possible null dereference (read {:x}) from (pc={:x},sec={},rel={:x})".format(
addr, pc, text_sec.owner + ':' + text_sec.name, pc - text_sec.base
) + FMT_NONE)
if addr >= self.io_start:
return self.io_at(addr).read(addr, size)
return self.data[addr: addr + size]
for sec_name, (start, size) in sections.items():
if program.base is None:
program.base = start
def write(self, addr: int, size: int, data):
if addr < 0x100:
pc = self.cpu.pc
text_sec = self.get_sec_containing(pc)
print(FMT_ERROR + "[MMU] possible null dereference (write {:x}) from (pc={:x},sec={},rel={:x})".format(
addr, pc, text_sec.owner + ':' + text_sec.name, pc - text_sec.base
) + FMT_NONE)
#in_code_sec = get_section_base_name(sec_name) in INSTRUCTION_SECTION_NAMES
program.add_section(
ElfMemorySection(
data[start:start+size], sec_name, program.context,
name, start, MemoryFlags(False, True)
)
)
if addr >= self.io_start:
return self.io_at(addr).write(addr, data, size)
self.data[addr:addr + size] = data[0:size]
program.context.labels.update(debug_info.symbols.get(name, dict()))
program.global_labels.update(debug_info.globals.get(name, set()))
def io_at(self, addr) -> IOModule:
for mod in self.io:
if mod.contains(addr):
return mod
raise InstructionAccessFault(addr)
yield program
def add_io(self, io: IOModule):
self.io.append(io)
def parse_no_debug(self) -> Program:
print(FMT_PARSE + "[MemoryImageLoader] Warning: loading memory image without debug information!" + FMT_NONE)
def __repr__(self):
return "MemoryImageMMU()"
with open(self.source_path, 'rb') as source_file:
data: bytes = source_file.read()
@lru_cache(maxsize=32)
def get_sec_containing(self, addr: int) -> Optional[LoadedMemorySection]:
next_sec = len(self.data)
for sec_addr, name in reversed(self.debug_info['sections'].items()):
if addr >= int(sec_addr):
owner, name = name.split(':')
base = int(sec_addr)
size = next_sec - base
flags = MemoryFlags('.text' in name, '.text' in name)
return ElfLoadedMemorySection(name, base, size, self.data[base:next_sec], flags, owner)
else:
next_sec = int(sec_addr)
p = Program(self.filename)
p.add_section(ElfMemorySection(
bytearray(data), '.text', p.context, p.name, 0, MemoryFlags(False, True)
))
return p
def translate_address(self, addr: int):
sec = self.get_sec_containing(addr)
if sec.name == '.empty':
return "<empty>"
symbs = self.debug_info['symbols'][sec.owner]
for sym, val in reversed(symbs.items()):
if addr >= val:
return "{}{:+x} ({}:{})".format(sym, addr - val, sec.owner, sec.name)
return "{}:{}{:+x}".format(sec.owner, sec.name, addr - sec.base)
def symbol(self, symb: str):
print(FMT_MEM + "Looking up symbol {}".format(symb))
for owner, symbs in self.debug_info['symbols'].items():
if symb in symbs:
print(" Hit in {}: {} = {}".format(owner, symb, symbs[symb]))
print(FMT_NONE, end="")
@classmethod
def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
if os.path.isfile(source_path + '.dbg'):
return MemoryImageLoader(source_path, dict(**options, debug=source_path + '.dbg'))
return MemoryImageLoader(source_path, options)

View File

@ -3,20 +3,23 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import sys
import time
from riscemu.CPU import *
from .CSR import CSR
from .ElfLoader import ElfBinaryFileLoader
from .Exceptions import *
from .ImageLoader import MemoryImageLoader
from .PrivMMU import PrivMMU
from .PrivRV32I import PrivRV32I
from .privmodes import PrivModes
from ..IO import TextIO
from ..IO.TextIO import TextIO
from ..instructions import RV32A, RV32M
from ..types import Program, UInt32
if typing.TYPE_CHECKING:
from riscemu import Executable, LoadedExecutable, LoadedInstruction
from riscemu.instructions.InstructionSet import InstructionSet
from riscemu.instructions.instruction_set import InstructionSet
class PrivCPU(CPU):
@ -25,7 +28,7 @@ class PrivCPU(CPU):
It should support M and U Mode, but no U-Mode Traps.
This allows us to
This is meant to emulate whole operating systems.
"""
csr: CSR
@ -38,105 +41,98 @@ class PrivCPU(CPU):
controls the resolution of the time csr register (in nanoseconds)
"""
INS_XLEN = 4
pending_traps: List[CpuTrap]
"""
Size of an instruction in memory. Should be 4, but since our loading code is shit, instruction take up
the equivalent of "1 byte" (this is actually impossible)
A list of traps which are pending to be handled
"""
def __init__(self, conf, mmu: PrivMMU):
super().__init__(conf, [PrivRV32I, RV32M, RV32A])
def __init__(self, conf):
super().__init__(PrivMMU(), [PrivRV32I, RV32M, RV32A], conf)
# start in machine mode
self.mode: PrivModes = PrivModes.MACHINE
mmu.set_cpu(self)
self.pc = mmu.get_entrypoint()
self.mmu = mmu
if hasattr(self.mmu, 'add_io'):
self.mmu.add_io(TextIO.TextIO(0xff0000, 64))
self.syscall_int = None
self.launch_debug = False
self.pending_traps: List[CpuTrap] = list()
self.exit_code = 0
self._time_start = 0
self._time_timecmp = 0
self._time_timecmp = UInt32(0)
self._time_interrupt_enabled = False
# performance counters
self._perf_counters = list()
# add TextIO
io = TextIO(0xFF0000, 64)
self.mmu.load_section(io, True)
# init csr
self._init_csr()
def _run(self, verbose=False):
self.TIME_RESOLUTION_NS = int(self.TIME_RESOLUTION_NS * conf.slowdown)
def run(self, verbose=False):
if self.pc <= 0:
return False
ins = None
launch_debug = False
try:
while not self.exit:
while not self.halted:
self.step(verbose)
except RiscemuBaseException as ex:
if isinstance(ex, LaunchDebuggerException):
self.launch_debug = True
launch_debug = True
self.pc += self.INS_XLEN
if self.exit:
if self.halted:
print()
print(FMT_CPU + "Program exited with code {}".format(self.exit_code) + FMT_NONE)
print(FMT_CPU + "[CPU] System halted with code {}".format(self.exit_code) + FMT_NONE)
sys.exit(self.exit_code)
elif self.launch_debug:
self.launch_debug = False
launch_debug_session(self, self.mmu, self.regs,
"Launching debugger:")
if not self.active_debug:
self._run(verbose)
elif launch_debug:
launch_debug_session(self)
if not self.debugger_active:
self.run(verbose)
else:
print()
print(FMT_CPU + "Program stopped without exiting - perhaps you stopped the debugger?" + FMT_NONE)
print(FMT_CPU + "[CPU] System stopped without halting - perhaps you stopped the debugger?" + FMT_NONE)
def load(self, e: riscemu.Executable):
raise NotImplementedError("Not supported!")
def run_loaded(self, le: 'riscemu.LoadedExecutable'):
raise NotImplementedError("Not supported!")
def get_tokenizer(self, tokenizer_input):
raise NotImplementedError("Not supported!")
def run(self, verbose: bool = False):
def launch(self, program: Optional[Program] = None, verbose: bool = False):
print(FMT_CPU + '[CPU] Started running from 0x{:08X} ({})'.format(self.pc, "kernel") + FMT_NONE)
self._time_start = time.perf_counter_ns() // self.TIME_RESOLUTION_NS
self._run(self.conf.verbosity > 1)
self.run(self.conf.verbosity > 1 or verbose)
def load_program(self, program: Program):
if program.name == 'kernel':
self.pc = program.entrypoint
super().load_program(program)
def _init_csr(self):
# set up CSR
self.csr = CSR()
self.csr.set('mhartid', 0) # core id
self.csr.set('mhartid', UInt32(0)) # core id
# TODO: set correct value
self.csr.set('mimpid', 0) # implementation id
self.csr.set('mimpid', UInt32(0)) # implementation id
# set mxl to 1 (32 bit) and set bits for i and m isa
self.csr.set('misa', (1 << 30) + (1 << 8) + (1 << 12)) # available ISA
self.csr.set('misa', UInt32((1 << 30) + (1 << 8) + (1 << 12))) # available ISA
# CSR write callbacks:
@self.csr.callback('halt')
def halt(old: int, new: int):
def halt(old: UInt32, new: UInt32):
if new != 0:
self.exit = True
self.exit_code = new
@self.csr.callback('mstatus')
def mstatus(old: int, new: int):
pass
self.halted = True
self.exit_code = new.value
@self.csr.callback('mtimecmp')
def mtimecmp(old, new):
def mtimecmp(old: UInt32, new: UInt32):
self._time_timecmp = (self.csr.get('mtimecmph') << 32) + new
self._time_interrupt_enabled = True
@self.csr.callback('mtimecmph')
def mtimecmph(old, new):
def mtimecmph(old: UInt32, new: UInt32):
self._time_timecmp = (new << 32) + self.csr.get('mtimecmp')
self._time_interrupt_enabled = True
@ -144,11 +140,11 @@ class PrivCPU(CPU):
@self.csr.virtual_register('time')
def get_time():
return (time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start) & (2 ** 32 - 1)
return UInt32(time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start)
@self.csr.virtual_register('timeh')
def get_timeh():
return (time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start) >> 32
return UInt32((time.perf_counter_ns() // self.TIME_RESOLUTION_NS - self._time_start) >> 32)
# add minstret and mcycle counters
@ -163,7 +159,7 @@ class PrivCPU(CPU):
self._timer_step()
self._check_interrupt()
ins = self.mmu.read_ins(self.pc)
if verbose and self.mode == PrivModes.USER:
if verbose and (self.mode == PrivModes.USER or self.conf.verbosity > 4):
print(FMT_CPU + " Running 0x{:08X}:{} {}".format(self.pc, FMT_NONE, ins))
self.run_instruction(ins)
self.pc += self.INS_XLEN
@ -175,6 +171,7 @@ class PrivCPU(CPU):
self.mmu.translate_address(self.pc),
self.pc
) + FMT_NONE)
breakpoint()
if self.conf.debug_on_exception:
raise LaunchDebuggerException()
self.pc += self.INS_XLEN
@ -190,10 +187,11 @@ class PrivCPU(CPU):
if not (len(self.pending_traps) > 0 and self.csr.get_mstatus('mie')):
return
# select best interrupt
# TODO: actually select based on the official ranking
# FIXME: actually select based on the official ranking
trap = self.pending_traps.pop() # use the most recent trap
if self.conf.verbosity > 0:
print(FMT_CPU + "[CPU] taking trap {}!".format(trap) + FMT_NONE)
self.regs.dump_reg_a()
if trap.priv != PrivModes.MACHINE:
print(FMT_CPU + "[CPU] Trap not targeting machine mode encountered! - undefined behaviour!" + FMT_NONE)
@ -204,18 +202,18 @@ class PrivCPU(CPU):
self.csr.set_mstatus('mpie', self.csr.get_mstatus('mie'))
self.csr.set_mstatus('mpp', self.mode.value)
self.csr.set_mstatus('mie', 0)
self.csr.set_mstatus('mie', UInt32(0))
self.csr.set('mcause', trap.mcause)
self.csr.set('mepc', self.pc - self.INS_XLEN)
self.csr.set('mtval', trap.mtval)
self.mode = trap.priv
mtvec = self.csr.get('mtvec')
if mtvec & 0b11 == 0:
self.pc = mtvec
self.pc = mtvec.value
if mtvec & 0b11 == 1:
self.pc = (mtvec & 0b11111111111111111111111111111100) + (trap.code * 4)
self.pc = ((mtvec & 0b11111111111111111111111111111100) + (trap.code * 4)).value
self.record_perf_profile()
if len(self._perf_counters) % 100 == 0:
if len(self._perf_counters) > 100:
self.show_perf()
def show_perf(self):
@ -231,11 +229,6 @@ class PrivCPU(CPU):
continue
cps = (cycle - cycled) / (time_ns - timed) * 1000000000
# print(" {:03d} cycles in {:08d}ns ({:.2f} cycles/s)".format(
# cycle - cycled,
# time_ns - timed,
# cps
# ))
cycled = cycle
timed = time_ns
cps_list.append(cps)
@ -244,3 +237,9 @@ class PrivCPU(CPU):
def record_perf_profile(self):
self._perf_counters.append((time.perf_counter_ns(), self.cycle))
@classmethod
def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
return [
AssemblyFileLoader, MemoryImageLoader, ElfBinaryFileLoader
]

View File

@ -1,42 +1,43 @@
from .types import ElfMemorySection
from ..MMU import *
from abc import abstractmethod
import typing
from .ElfLoader import ElfExecutable
if typing.TYPE_CHECKING:
from .PrivCPU import PrivCPU
class PrivMMU(MMU):
cpu: 'PrivCPU'
@abstractmethod
def get_entrypoint(self) -> int:
raise
def get_sec_containing(self, addr: T_AbsoluteAddress) -> MemorySection:
# try to get an existing section
existing_sec = super().get_sec_containing(addr)
def set_cpu(self, cpu: 'PrivCPU'):
self.cpu = cpu
if existing_sec is not None:
return existing_sec
def translate_address(self, addr: int):
return ""
# get section preceding empty space at addr
sec_before = next((sec for sec in reversed(self.sections) if sec.end < addr), None)
# get sec succeeding empty space at addr
sec_after = next((sec for sec in self.sections if sec.base > addr), None)
# calc start end end of "free" space
prev_sec_end = 0 if sec_before is None else sec_before.end
next_sec_start = 0x7FFFFFFF if sec_after is None else sec_after.base
class LoadedElfMMU(PrivMMU):
def __init__(self, elf: ElfExecutable):
super().__init__(conf=RunConfig())
self.entrypoint = elf.symbols['_start']
# start at the end of the prev section, or current address - 0xFFFF (aligned to 16 byte boundary)
start = max(prev_sec_end, align_addr(addr - 0xFFFF, 16))
# end at the start of the next section, or address + 0xFFFF (aligned to 16 byte boundary)
end = min(next_sec_start, align_addr(addr + 0xFFFF, 16))
self.binaries.append(elf)
for sec in elf.sections:
self.sections.append(sec)
sec = ElfMemorySection(bytearray(end - start), '.empty', self.global_instruction_context(), '', start, MemoryFlags(False, True))
self.sections.append(sec)
self._update_state()
def load_bin(self, exe: Executable) -> LoadedExecutable:
raise NotImplementedError("This is a privMMU, it's initialized with a single ElfExecutable!")
return sec
def allocate_section(self, name: str, req_size: int, flag: MemoryFlags):
raise NotImplementedError("Not supported!")
def get_entrypoint(self):
return self.entrypoint
def global_instruction_context(self) -> InstructionContext:
context = InstructionContext()
context.global_symbol_dict = self.global_symbols
return context

View File

@ -5,7 +5,7 @@ SPDX-License-Identifier: MIT
"""
from ..instructions.RV32I import *
from ..Exceptions import INS_NOT_IMPLEMENTED
from riscemu.types.exceptions import INS_NOT_IMPLEMENTED
from .Exceptions import *
from .privmodes import PrivModes
from ..colors import FMT_CPU, FMT_NONE
@ -21,7 +21,7 @@ class PrivRV32I(RV32I):
This is an extension of RV32I, written for the PrivCPU class
"""
def instruction_csrrw(self, ins: 'LoadedInstruction'):
def instruction_csrrw(self, ins: 'Instruction'):
rd, rs, csr_addr = self.parse_crs_ins(ins)
old_val = None
if rd != 'zero':
@ -34,7 +34,7 @@ class PrivRV32I(RV32I):
if old_val is not None:
self.regs.set(rd, old_val)
def instruction_csrrs(self, ins: 'LoadedInstruction'):
def instruction_csrrs(self, ins: 'Instruction'):
rd, rs, csr_addr = self.parse_crs_ins(ins)
if rs != 'zero':
# oh no, this should not happen!
@ -44,14 +44,13 @@ class PrivRV32I(RV32I):
old_val = self.cpu.csr.get(csr_addr)
self.regs.set(rd, old_val)
def instruction_csrrc(self, ins: 'LoadedInstruction'):
def instruction_csrrc(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_csrrsi(self, ins: 'LoadedInstruction'):
def instruction_csrrsi(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_csrrwi(self, ins: 'LoadedInstruction'):
def instruction_csrrwi(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
rd, imm, addr = ins.get_reg(0), ins.get_imm(1), ins.get_imm(2)
if rd != 'zero':
@ -61,11 +60,10 @@ class PrivRV32I(RV32I):
self.cpu.csr.assert_can_write(self.cpu.mode, addr)
self.cpu.csr.set(addr, imm)
def instruction_csrrci(self, ins: 'LoadedInstruction'):
def instruction_csrrci(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_mret(self, ins: 'LoadedInstruction'):
def instruction_mret(self, ins: 'Instruction'):
if self.cpu.mode != PrivModes.MACHINE:
print("MRET not inside machine level code!")
raise IllegalInstructionTrap(ins)
@ -77,10 +75,10 @@ class PrivRV32I(RV32I):
self.cpu.mode = PrivModes(mpp)
# restore pc
mepc = self.cpu.csr.get('mepc')
self.cpu.pc = mepc - self.cpu.INS_XLEN
self.cpu.pc = (mepc - self.cpu.INS_XLEN).value
if self.cpu.conf.verbosity > 0:
sec = self.mmu.get_sec_containing(mepc)
sec = self.mmu.get_sec_containing(mepc.value)
if sec is not None:
print(FMT_CPU + "[CPU] returning to mode {} in {} (0x{:x})".format(
PrivModes(mpp).name,
@ -90,78 +88,83 @@ class PrivRV32I(RV32I):
if self.cpu.conf.verbosity > 1:
self.regs.dump_reg_a()
def instruction_uret(self, ins: 'LoadedInstruction'):
def instruction_uret(self, ins: 'Instruction'):
raise IllegalInstructionTrap(ins)
def instruction_sret(self, ins: 'LoadedInstruction'):
def instruction_sret(self, ins: 'Instruction'):
raise IllegalInstructionTrap(ins)
def instruction_scall(self, ins: 'LoadedInstruction'):
def instruction_scall(self, ins: 'Instruction'):
"""
Overwrite the scall from userspace RV32I
"""
raise EcallTrap(self.cpu.mode)
def instruction_beq(self, ins: 'LoadedInstruction'):
def instruction_beq(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 == rs2:
self.pc += dst - 4
self.pc += dst.value - 4
def instruction_bne(self, ins: 'LoadedInstruction'):
def instruction_bne(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 != rs2:
self.pc += dst - 4
self.pc += dst.value - 4
def instruction_blt(self, ins: 'LoadedInstruction'):
def instruction_blt(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 < rs2:
self.pc += dst - 4
self.pc += dst.value - 4
def instruction_bge(self, ins: 'LoadedInstruction'):
def instruction_bge(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 >= rs2:
self.pc += dst - 4
self.pc += dst.value - 4
def instruction_bltu(self, ins: 'LoadedInstruction'):
def instruction_bltu(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
if rs1 < rs2:
self.pc += dst - 4
self.pc += dst.value - 4
def instruction_bgeu(self, ins: 'LoadedInstruction'):
def instruction_bgeu(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
if rs1 >= rs2:
self.pc += dst - 4
self.pc += dst.value - 4
# technically deprecated
def instruction_j(self, ins: 'LoadedInstruction'):
def instruction_j(self, ins: 'Instruction'):
raise NotImplementedError("Should never be reached!")
def instruction_jal(self, ins: 'LoadedInstruction'):
def instruction_jal(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
addr = ins.get_imm(1)
if reg == 'ra' and self.cpu.mode == PrivModes.USER and self.cpu.conf.verbosity > 1:
print(FMT_CPU + 'Jumping to {} (0x{:x})'.format(
if reg == 'ra' and (
(self.cpu.mode == PrivModes.USER and self.cpu.conf.verbosity > 1) or
(self.cpu.conf.verbosity > 3)
):
print(FMT_CPU + 'Jumping from 0x{:x} to {} (0x{:x})'.format(
self.pc,
self.mmu.translate_address(self.pc + addr),
self.pc + addr
) + FMT_NONE)
self.regs.set(reg, self.pc)
self.regs.dump_reg_a()
self.regs.set(reg, Int32(self.pc))
self.pc += addr - 4
def instruction_jalr(self, ins: 'LoadedInstruction'):
def instruction_jalr(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
rd, rs, imm = self.parse_rd_rs_imm(ins)
self.regs.set(rd, self.pc)
self.pc = rs + imm - 4
self.regs.set(rd, Int32(self.pc))
self.pc = rs.value + imm.value - 4
def instruction_sbreak(self, ins: 'LoadedInstruction'):
def instruction_sbreak(self, ins: 'Instruction'):
raise LaunchDebuggerException()
def parse_crs_ins(self, ins: 'LoadedInstruction'):
def parse_crs_ins(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
return ins.get_reg(0), ins.get_reg(1), ins.get_imm(2)
def parse_mem_ins(self, ins: 'LoadedInstruction') -> Tuple[str, int]:
def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, int]:
ASSERT_LEN(ins.args, 3)
addr = self.get_reg_content(ins, 1) + ins.get_imm(2)
reg = ins.get_reg(0)

View File

@ -1,7 +1,6 @@
from .PrivCPU import PrivCPU, RunConfig
from .ImageLoader import MemoryImageMMU
from .PrivMMU import LoadedElfMMU
from .ElfLoader import ElfExecutable
from riscemu import RunConfig
from riscemu.types import Program
from .PrivCPU import PrivCPU
import sys
@ -10,26 +9,29 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(description='RISC-V privileged architecture emulator', prog='riscemu')
parser.add_argument('--kernel', type=str, help='Kernel elf loaded with user programs', nargs='?')
parser.add_argument('--image', type=str, help='Memory image containing kernel', nargs='?')
parser.add_argument('--debug-exceptions', help='Launch the interactive debugger when an exception is generated', action='store_true')
parser.add_argument('source', type=str,
help='Compiled RISC-V ELF file or memory image containing compiled RISC-V ELF files', nargs='+')
parser.add_argument('--debug-exceptions', help='Launch the interactive debugger when an exception is generated',
action='store_true')
parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count', default=0)
parser.add_argument('-v', '--verbose', help="Verbosity level (can be used multiple times)", action='count',
default=0)
parser.add_argument('--slowdown', help="Slow down the emulated CPU clock by a factor", type=float, default=1)
args = parser.parse_args()
mmu = None
if args.kernel is not None:
mmu = LoadedElfMMU(ElfExecutable(args.kernel))
elif args.image is not None:
mmu = MemoryImageMMU(args.image)
if mmu is None:
print("You must specify one of --kernel or --image for running in privilege mode!")
sys.exit(1)
cpu = PrivCPU(RunConfig(verbosity=args.verbose, debug_on_exception=args.debug_exceptions), mmu)
cpu.run()
cpu = PrivCPU(RunConfig(verbosity=args.verbose, debug_on_exception=args.debug_exceptions, slowdown=args.slowdown))
for source_path in args.source:
loader = max((loader for loader in cpu.get_loaders()), key=lambda l: l.can_parse(source_path))
argv, opts = loader.get_options(sys.argv)
program = loader.instantiate(source_path, opts).parse()
if isinstance(program, Program):
cpu.load_program(program)
else:
program_iter = program
for program in program_iter:
cpu.load_program(program)
cpu.launch(verbose=args.verbose > 4)

147
riscemu/priv/types.py Normal file
View File

@ -0,0 +1,147 @@
import json
from collections import defaultdict
from dataclasses import dataclass
from functools import lru_cache
from typing import Tuple, Dict, Set
from riscemu.colors import FMT_NONE, FMT_PARSE
from riscemu.decoder import format_ins, RISCV_REGS, decode
from riscemu.priv.Exceptions import InstructionAccessFault, InstructionAddressMisalignedTrap, LoadAccessFault
from riscemu.types import Instruction, InstructionContext, T_RelativeAddress, MemoryFlags, T_AbsoluteAddress, \
BinaryDataMemorySection
@dataclass(frozen=True)
class ElfInstruction(Instruction):
name: str
args: Tuple[int]
encoded: int
def get_imm(self, num: int) -> int:
return self.args[num]
def get_imm_reg(self, num: int) -> Tuple[int, int]:
return self.args[-1], self.args[-2]
def get_reg(self, num: int) -> str:
return RISCV_REGS[self.args[num]]
def __repr__(self) -> str:
if self.name == 'jal' and self.args[0] == 0:
return "j {}".format(self.args[1])
if self.name == 'addi' and self.args[2] == 0:
return "mv {}, {}".format(self.get_reg(0), self.get_reg(1))
if self.name == 'addi' and self.args[1] == 0:
return "li {}, {}".format(self.get_reg(0), self.args[2])
if self.name == 'ret' and len(self.args) == 0:
return "ret"
return format_ins(self.encoded, self.name)
class ElfMemorySection(BinaryDataMemorySection):
def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int,
flags: MemoryFlags):
super().__init__(data, name, context, owner, base=base, flags=flags)
self.read_ins = lru_cache(maxsize=self.size // 4)(self.read_ins)
def read_ins(self, offset):
if not self.flags.executable:
print(FMT_PARSE + "Reading instruction from non-executable memory!" + FMT_NONE)
raise InstructionAccessFault(offset + self.base)
if offset % 4 != 0:
raise InstructionAddressMisalignedTrap(offset + self.base)
return ElfInstruction(*decode(self.data[offset:offset + 4]))
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
if self.flags.read_only:
raise LoadAccessFault('read-only section', offset + self.base, size, 'write')
self.read_ins.cache_clear()
return super(ElfMemorySection, self).write(offset, size, data)
@property
def end(self):
return self.size + self.base
class MemoryImageDebugInfos:
VERSION = '1.0.0'
"""
Schema version
"""
base: T_AbsoluteAddress = 0
"""
The base address where the image starts. Defaults to zero.
"""
sections: Dict[str, Dict[str, Tuple[int, int]]]
"""
This dictionary maps a program and section to (start address, section length)
"""
symbols: Dict[str, Dict[str, int]]
"""
This dictionary maps a program and a symbol to a value
"""
globals: Dict[str, Set[str]]
"""
This dictionary contains the list of all global symbols of a given program
"""
def __init__(self,
sections: Dict[str, Dict[str, Tuple[int, int]]],
symbols: Dict[str, Dict[str, int]],
globals: Dict[str, Set[str]],
base: int = 0
):
self.sections = sections
self.symbols = symbols
self.globals = globals
for name in globals:
globals[name] = set(globals[name])
self.base = base
def serialize(self) -> str:
def serialize(obj: any) -> str:
if isinstance(obj, defaultdict):
return json.dumps(dict(obj), default=serialize)
if isinstance(obj, (set, tuple)):
return json.dumps(list(obj), default=serialize)
return "<<unserializable {}>>".format(getattr(obj, '__qualname__', '{unknown}'))
return json.dumps(
dict(
sections=self.sections,
symbols=self.symbols,
globals=self.globals,
base=self.base,
VERSION=self.VERSION
),
default=serialize
)
@classmethod
def load(cls, serialized_str: str) -> 'MemoryImageDebugInfos':
json_obj: dict = json.loads(serialized_str)
if 'VERSION' not in json_obj:
raise RuntimeError("Unknown MemoryImageDebugInfo version!")
version: str = json_obj.pop('VERSION')
# compare major version
if version != cls.VERSION and version.split('.')[0] != cls.VERSION.split('.')[0]:
raise RuntimeError(
"Unknown MemoryImageDebugInfo version! This emulator expects version {}, debug info version {}".format(
cls.VERSION, version
)
)
return MemoryImageDebugInfos(**json_obj)
@classmethod
def builder(cls) -> 'MemoryImageDebugInfos':
return MemoryImageDebugInfos(
defaultdict(dict), defaultdict(dict), defaultdict(set)
)

View File

@ -1,28 +1,27 @@
"""
RiscEmu (c) 2021 Anton Lydike
RiscEmu (c) 2021-2022 Anton Lydike
SPDX-License-Identifier: MIT
"""
from .Config import RunConfig
from .helpers import *
from collections import defaultdict
from .Exceptions import InvalidRegisterException
from .helpers import *
if typing.TYPE_CHECKING:
from .types import Int32
class Registers:
"""
Represents a bunch of registers
"""
def __init__(self, conf: RunConfig):
"""
Initialize the register configuration, respecting the RunConfig conf
:param conf: The RunConfig
"""
self.vals = defaultdict(lambda: 0)
def __init__(self):
from .types import Int32
self.vals = defaultdict(lambda: Int32(0))
self.last_set = None
self.last_read = None
self.conf = conf
def dump(self, full=False):
"""
@ -86,7 +85,7 @@ class Registers:
return FMT_GRAY + txt + FMT_NONE
return txt
def set(self, reg, val, mark_set=True) -> bool:
def set(self, reg, val: 'Int32', mark_set=True) -> bool:
"""
Set a register content to val
:param reg: The register to set
@ -94,9 +93,15 @@ class Registers:
:param mark_set: If True, marks this register as "last accessed" (only used internally)
:return: If the operation was successful
"""
from .types import Int32
# remove after refactoring is complete
if not isinstance(val, Int32):
raise RuntimeError("Setting register to non-Int32 value! Please refactor your code!")
if reg == 'zero':
return False
#if reg not in Registers.all_registers():
# if reg not in Registers.all_registers():
# raise InvalidRegisterException(reg)
# replace fp register with s1, as these are the same register
if reg == 'fp':
@ -104,17 +109,17 @@ class Registers:
if mark_set:
self.last_set = reg
# check 32 bit signed bounds
self.vals[reg] = bind_twos_complement(val)
self.vals[reg] = val.unsigned()
return True
def get(self, reg, mark_read=True):
def get(self, reg, mark_read=True) -> 'Int32':
"""
Retuns the contents of register reg
:param reg: The register name
:param mark_read: If the register should be markes as "last read" (only used internally)
:return: The contents of register reg
"""
#if reg not in Registers.all_registers():
# if reg not in Registers.all_registers():
# raise InvalidRegisterException(reg)
if reg == 'fp':
reg = 's0'

View File

@ -4,23 +4,19 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import sys
from dataclasses import dataclass
from typing import Dict, IO
import sys
from .helpers import *
import riscemu
import typing
if typing.TYPE_CHECKING:
from . import CPU
from riscemu.CPU import UserModeCPU
SYSCALLS = {
63: 'read',
64: 'write',
93: 'exit',
63: 'read',
64: 'write',
93: 'exit',
1024: 'open',
1025: 'close',
}
@ -35,6 +31,7 @@ OPEN_MODES = {
}
"""All available file open modes"""
@dataclass(frozen=True)
class Syscall:
"""
@ -42,7 +39,7 @@ class Syscall:
"""
id: int
"""The syscall number (e.g. 64 - write)"""
cpu: 'riscemu.CPU'
cpu: 'UserModeCPU'
"""The CPU object that created the syscall"""
@property
@ -55,7 +52,7 @@ class Syscall:
)
def ret(self, code):
self.cpu.regs.set('a0', code)
self.cpu.regs.set('a0', Int32(code))
def get_syscall_symbols():
@ -94,9 +91,9 @@ class SyscallInterface:
read syscall (63): read from file no a0, into addr a1, at most a2 bytes
on return a0 will be the number of read bytes or -1 if an error occured
"""
fileno = scall.cpu.regs.get('a0')
addr = scall.cpu.regs.get('a1')
size = scall.cpu.regs.get('a2')
fileno = scall.cpu.regs.get('a0').unsigned_value
addr = scall.cpu.regs.get('a1').unsigned_value
size = scall.cpu.regs.get('a2').unsigned_value
if fileno not in self.open_files:
scall.cpu.regs.set('a0', -1)
return
@ -116,9 +113,9 @@ class SyscallInterface:
write syscall (64): write a2 bytes from addr a1 into fileno a0
on return a0 will hold the number of bytes written or -1 if an error occured
"""
fileno = scall.cpu.regs.get('a0')
addr = scall.cpu.regs.get('a1')
size = scall.cpu.regs.get('a2')
fileno = scall.cpu.regs.get('a0').unsigned_value
addr = scall.cpu.regs.get('a1').unsigned_value
size = scall.cpu.regs.get('a2').unsigned_value
if fileno not in self.open_files:
return scall.ret(-1)
@ -145,13 +142,14 @@ class SyscallInterface:
Requires running with flag scall-fs
"""
if not scall.cpu.conf.scall_fs:
# FIXME: this should be toggleable in a global setting or somethign
if True:
print(FMT_SYSCALL + '[Syscall] open: opening files not supported without scall-fs flag!' + FMT_NONE)
return scall.ret(-1)
mode = scall.cpu.regs.get('a0')
addr = scall.cpu.regs.get('a1')
size = scall.cpu.regs.get('a2')
mode = scall.cpu.regs.get('a0').unsigned_value
addr = scall.cpu.regs.get('a1').unsigned_value
size = scall.cpu.regs.get('a2').unsigned_value
mode_st = OPEN_MODES.get(mode, )
if mode_st == -1:
@ -178,7 +176,7 @@ class SyscallInterface:
return -1 if an error was encountered, otherwise returns 0
"""
fileno = scall.cpu.regs.get('a0')
fileno = scall.cpu.regs.get('a0').unsigned_value
if fileno not in self.open_files:
print(FMT_SYSCALL + '[Syscall] close: unknown fileno {}!'.format(fileno) + FMT_NONE)
return scall.ret(-1)
@ -192,11 +190,11 @@ class SyscallInterface:
"""
Exit syscall. Exits the system with status code a0
"""
scall.cpu.exit = True
scall.cpu.exit_code = scall.cpu.regs.get('a0')
scall.cpu.halted = True
scall.cpu.exit_code = scall.cpu.regs.get('a0').value
def __repr__(self):
return "{}(\n\tfiles={}\n)".format(
self.__class__.__name__,
self.open_files
)
)

135
riscemu/tokenizer.py Normal file
View File

@ -0,0 +1,135 @@
"""
RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import re
from dataclasses import dataclass
from enum import Enum, auto
from typing import List, Iterable
from riscemu.decoder import RISCV_REGS
from riscemu.types.exceptions import ParseException
LINE_COMMENT_STARTERS = ('#', ';', '//')
WHITESPACE_PATTERN = re.compile(r'\s+')
MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+|[A-z0-9_-]+)\(([A-z]+[0-9]{0,2})\)$')
REGISTER_NAMES = RISCV_REGS
class TokenType(Enum):
COMMA = auto()
ARGUMENT = auto()
PSEUDO_OP = auto()
INSTRUCTION_NAME = auto()
NEWLINE = auto()
LABEL = auto()
@dataclass(frozen=True)
class Token:
type: TokenType
value: str
def __str__(self):
if self.type == TokenType.NEWLINE:
return '\\n'
if self.type == TokenType.COMMA:
return ', '
return '{}({})'.format(self.type.name[0:3], self.value)
NEWLINE = Token(TokenType.NEWLINE, '\n')
COMMA = Token(TokenType.COMMA, ',')
def tokenize(input: Iterable[str]) -> Iterable[Token]:
for line in input:
for line_comment_start in LINE_COMMENT_STARTERS:
if line_comment_start in line:
line = line[:line.index(line_comment_start)]
line.strip(' \t\n')
if not line:
continue
parts = list(part for part in split_whitespace_respecting_quotes(line) if part)
yield from parse_line(parts)
yield NEWLINE
def parse_line(parts: List[str]) -> Iterable[Token]:
if len(parts) == 0:
return ()
first_token = parts[0]
if first_token[0] == '.':
yield Token(TokenType.PSEUDO_OP, first_token)
elif first_token[-1] == ':':
yield Token(TokenType.LABEL, first_token)
yield from parse_line(parts[1:])
return
else:
yield Token(TokenType.INSTRUCTION_NAME, first_token)
for part in parts[1:]:
if part == ',':
yield COMMA
continue
yield from parse_arg(part)
def parse_arg(arg: str) -> Iterable[Token]:
comma = arg[-1] == ','
arg = arg[:-1] if comma else arg
mem_match_resul = re.match(MEMORY_ADDRESS_PATTERN, arg)
if mem_match_resul:
register = mem_match_resul.group(2).lower()
if register not in RISCV_REGS:
raise ParseException(f'"{register}" is not a valid register!')
yield Token(TokenType.ARGUMENT, register)
yield Token(TokenType.ARGUMENT, mem_match_resul.group(1))
else:
yield Token(TokenType.ARGUMENT, arg)
if comma:
yield COMMA
def print_tokens(tokens: Iterable[Token]):
for token in tokens:
print(token, end='\n' if token == NEWLINE else '')
print("", flush=True, end="")
def split_whitespace_respecting_quotes(line: str) -> Iterable[str]:
quote = ""
part = ""
for c in line:
if c == quote:
yield part
part = ""
quote = ""
continue
if quote != "":
part += c
continue
if c in "\"'":
if part:
yield part
quote = c
part = ""
continue
if c in ' \t\n':
if part:
yield part
part = ""
continue
part += c
if part:
yield part

29
riscemu/types/__init__.py Normal file
View File

@ -0,0 +1,29 @@
from typing import Dict
import re
# define some base type aliases so we can keep track of absolute and relative addresses
T_RelativeAddress = int
T_AbsoluteAddress = int
# parser options are just dictionaries with arbitrary values
T_ParserOpts = Dict[str, any]
NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
# base classes
from .flags import MemoryFlags
from .int32 import UInt32, Int32
from .instruction import Instruction
from .instruction_context import InstructionContext
from .memory_section import MemorySection
from .program import Program
from .program_loader import ProgramLoader
from .cpu import CPU
from .simple_instruction import SimpleInstruction
from .instruction_memory_section import InstructionMemorySection
from .binary_data_memory_section import BinaryDataMemorySection
# exceptions
from .exceptions import ParseException, NumberFormatException, MemoryAccessException, OutOfMemoryException, \
LinkerException, LaunchDebuggerException, RiscemuBaseException, InvalidRegisterException, \
InvalidAllocationException, InvalidSyscallException, UnimplementedInstruction

View File

@ -0,0 +1,29 @@
from . import MemorySection, InstructionContext, MemoryFlags, T_RelativeAddress, Instruction
from ..types.exceptions import MemoryAccessException
class BinaryDataMemorySection(MemorySection):
def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: str, base: int = 0, flags: MemoryFlags = None):
self.name = name
self.base = base
self.context = context
self.size = len(data)
self.flags = flags if flags is not None else MemoryFlags(False, False)
self.data = data
self.owner = owner
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
if offset + size > self.size:
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
return self.data[offset:offset + size]
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
if offset + size > self.size:
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
if len(data[0:size]) != size:
raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
self.data[offset:offset + size] = data[0:size]
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
offset, 4, 'instruction fetch')

107
riscemu/types/cpu.py Normal file
View File

@ -0,0 +1,107 @@
import typing
from abc import ABC, abstractmethod
from typing import List, Type, Callable, Set, Dict
from ..registers import Registers
from ..config import RunConfig
from ..colors import FMT_RED, FMT_NONE
from . import T_AbsoluteAddress, Instruction, Program, ProgramLoader
class CPU(ABC):
# static cpu configuration
INS_XLEN: int = 4
# housekeeping variables
regs: Registers
mmu: 'MMU'
pc: T_AbsoluteAddress
cycle: int
halted: bool
# debugging context
debugger_active: bool
# instruction information
instructions: Dict[str, Callable[[Instruction], None]]
instruction_sets: Set['InstructionSet']
# configuration
conf: RunConfig
def __init__(self, mmu: 'MMU', instruction_sets: List[Type['InstructionSet']], conf: RunConfig):
self.mmu = mmu
self.regs = Registers()
self.conf = conf
self.instruction_sets = set()
self.instructions = dict()
for set_class in instruction_sets:
ins_set = set_class(self)
self.instructions.update(ins_set.load())
self.instruction_sets.add(ins_set)
self.halted = False
self.cycle = 0
self.pc = 0
self.debugger_active = False
def run_instruction(self, ins: Instruction):
"""
Execute a single instruction
:param ins: The instruction to execute
"""
if ins.name in self.instructions:
self.instructions[ins.name](ins)
else:
# this should never be reached, as unknown instructions are imparseable
raise RuntimeError("Unknown instruction: {}".format(ins))
def load_program(self, program: Program):
self.mmu.load_program(program)
def __repr__(self):
"""
Returns a representation of the CPU and some of its state.
"""
return "{}(pc=0x{:08X}, cycle={}, halted={} instructions={})".format(
self.__class__.__name__,
self.pc,
self.cycle,
self.halted,
" ".join(s.name for s in self.instruction_sets)
)
@abstractmethod
def step(self, verbose=False):
pass
@abstractmethod
def run(self, verbose=False):
pass
def launch(self, program: Program, verbose: bool = False):
if program not in self.mmu.programs:
print(FMT_RED + '[CPU] Cannot launch program that\'s not loaded!' + FMT_NONE)
return
self.pc = program.entrypoint
self.run(verbose)
@classmethod
@abstractmethod
def get_loaders(cls) -> typing.Iterable[Type[ProgramLoader]]:
pass
def get_best_loader_for(self, file_name: str) -> Type[ProgramLoader]:
return max(self.get_loaders(), key=lambda ld: ld.can_parse(file_name))
@property
def sections(self):
return self.mmu.sections
@property
def programs(self):
return self.mmu.programs

View File

@ -4,13 +4,12 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
from abc import abstractmethod
from ..colors import *
import typing
from abc import abstractmethod
from .colors import *
if typing.TYPE_CHECKING:
from .Executable import LoadedInstruction
from . import Instruction
class RiscemuBaseException(BaseException):
@ -18,12 +17,15 @@ class RiscemuBaseException(BaseException):
def message(self):
pass
def print_stacktrace(self):
import traceback
traceback.print_exception(type(self), self, self.__traceback__)
# Parsing exceptions:
class ParseException(RiscemuBaseException):
def __init__(self, msg, data=None):
super().__init__()
super().__init__(msg, data)
self.msg = msg
self.data = data
@ -116,13 +118,15 @@ class InvalidAllocationException(RiscemuBaseException):
class UnimplementedInstruction(RiscemuBaseException):
def __init__(self, ins: 'LoadedInstruction'):
def __init__(self, ins: 'Instruction', context = None):
self.ins = ins
self.context = context
def message(self):
return FMT_CPU + "{}({})".format(
return FMT_CPU + "{}({}{})".format(
self.__class__.__name__,
repr(self.ins)
repr(self.ins),
', context={}'.format(self.context) if self.context is not None else ''
) + FMT_NONE

13
riscemu/types/flags.py Normal file
View File

@ -0,0 +1,13 @@
from dataclasses import dataclass
@dataclass(frozen=True)
class MemoryFlags:
read_only: bool
executable: bool
def __repr__(self):
return "r{}{}".format(
'-' if self.read_only else 'w',
'x' if self.executable else '-'
)

View File

@ -0,0 +1,31 @@
from abc import ABC, abstractmethod
from typing import Tuple
class Instruction(ABC):
name: str
args: tuple
@abstractmethod
def get_imm(self, num: int) -> int:
"""
parse and get immediate argument
"""
pass
@abstractmethod
def get_imm_reg(self, num: int) -> Tuple[int, str]:
"""
parse and get an argument imm(reg)
"""
pass
@abstractmethod
def get_reg(self, num: int) -> str:
"""
parse and get an register argument
"""
pass
def __repr__(self):
return "{} {}".format(self.name, ", ".join(self.args))

View File

@ -0,0 +1,53 @@
from collections import defaultdict
from typing import Dict, List, Optional
from .exceptions import ParseException
from ..types import T_AbsoluteAddress, T_RelativeAddress, NUMBER_SYMBOL_PATTERN
class InstructionContext:
base_address: T_AbsoluteAddress
"""
The address where the instruction block is placed
"""
labels: Dict[str, T_RelativeAddress]
"""
This dictionary maps all labels to their relative position of the instruction block
"""
numbered_labels: Dict[str, List[T_RelativeAddress]]
"""
This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where
the label was placed
"""
global_symbol_dict: Dict[str, T_AbsoluteAddress]
"""
A reference to the MMU's global symbol dictionary for access to global symbols
"""
def __init__(self):
self.labels = dict()
self.numbered_labels = defaultdict(list)
self.base_address = 0
self.global_symbol_dict = dict()
def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_AbsoluteAddress]:
if NUMBER_SYMBOL_PATTERN.match(symbol):
if address_at is None:
raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
direction = symbol[-1]
values = self.numbered_labels.get(symbol[:-1], [])
if direction == 'b':
return max((addr + self.base_address for addr in values if addr < address_at), default=None)
else:
return min((addr + self.base_address for addr in values if addr > address_at), default=None)
else:
# if it's not a local symbol, try the globals
if symbol not in self.labels:
return self.global_symbol_dict.get(symbol, None)
# otherwise return the local symbol
return self.labels.get(symbol, None)

View File

@ -0,0 +1,27 @@
from typing import List
from . import MemorySection, Instruction, InstructionContext, MemoryFlags, T_RelativeAddress
from .exceptions import MemoryAccessException
class InstructionMemorySection(MemorySection):
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: str, base: int = 0):
self.name = name
self.base = base
self.context = context
self.size = len(instructions) * 4
self.flags = MemoryFlags(True, True)
self.instructions = instructions
self.owner = owner
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
if offset % 4 != 0:
raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
return self.instructions[offset // 4]

273
riscemu/types/int32.py Normal file
View File

@ -0,0 +1,273 @@
from typing import Union
from ctypes import c_int32, c_uint32
class Int32:
"""
This class implements 32bit signed integers (see :class:`UInt32` for unsigned integers)
It implements basically all mathematical dunder magic methods (__add__, __sub__, etc.)
You can use it just like you would any other integer, just be careful when passing it
to functions which actually expect an integer and not a Int32.
"""
_type = c_int32
__slots__ = ('_val',)
def __init__(self, val: Union[int, c_int32, c_uint32, 'Int32', bytes, bytearray] = 0):
if isinstance(val, (bytes, bytearray)):
signed = len(val) == 4 and self._type == c_int32
self._val = self.__class__._type(int.from_bytes(val, 'little', signed=signed))
elif isinstance(val, self.__class__._type):
self._val = val
elif isinstance(val, (c_uint32, c_int32, Int32)):
self._val = self.__class__._type(val.value)
elif isinstance(val, int):
self._val = self.__class__._type(val)
else:
raise RuntimeError(
"Unknonw {} input type: {} ({})".format(self.__class__.__name__, type(val), val)
)
def __add__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value + other)
def __sub__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value - other)
def __mul__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value * other)
def __truediv__(self, other):
return self // other
def __floordiv__(self, other):
if isinstance(other, Int32):
other = other.value
return self.__class__(self.value // other)
def __mod__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value % other)
def __and__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value & other)
def __or__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value | other)
def __xor__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self._val.value ^ other)
def __lshift__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self.value << other)
def __rshift__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.__class__(self.value >> other)
def __eq__(self, other: Union['Int32', int]):
if isinstance(other, Int32):
other = other.value
return self.value == other
def __neg__(self):
return self.__class__(-self._val.value)
def __abs__(self):
return self.__class__(abs(self.value))
def __bytes__(self):
return self.to_bytes(4)
def __repr__(self):
return '{}({})'.format(self.__class__.__name__, self.value)
def __str__(self):
return str(self.value)
def __format__(self, format_spec):
return self.value.__format__(format_spec)
def __hash__(self):
return hash(self.value)
def __gt__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value > other
def __lt__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value < other
def __le__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value <= other
def __ge__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value >= other
def __bool__(self):
return bool(self.value)
def __cmp__(self, other):
if isinstance(other, Int32):
other = other.value
return self.value.__cmp__(other)
# right handed binary operators
def __radd__(self, other):
return self + other
def __rsub__(self, other):
return self.__class__(other) - self
def __rmul__(self, other):
return self * other
def __rtruediv__(self, other):
return self.__class__(other) // self
def __rfloordiv__(self, other):
return self.__class__(other) // self
def __rmod__(self, other):
return self.__class__(other) % self
def __rand__(self, other):
return self.__class__(other) & self
def __ror__(self, other):
return self.__class__(other) | self
def __rxor__(self, other):
return self.__class__(other) ^ self
@property
def value(self) -> int:
"""
The value represented by this Integer
:return:
"""
return self._val.value
def unsigned(self) -> 'UInt32':
"""
Convert to an unsigned representation. See :class:Uint32
:return:
"""
return UInt32(self)
def to_bytes(self, bytes: int = 4) -> bytearray:
"""
Convert to a bytearray of length :param:bytes
:param bytes: The length of the bytearray
:return: A little-endian representation of the contained integer
"""
return bytearray(self.unsigned_value.to_bytes(4, 'little'))[0:bytes]
def signed(self) -> 'Int32':
"""
Convert to a signed representation. See :class:Int32
:return:
"""
if self.__class__ == Int32:
return self
return Int32(self)
@property
def unsigned_value(self):
"""
Return the value interpreted as an unsigned integer
:return:
"""
return c_uint32(self.value).value
def shift_right_logical(self, ammount: Union['Int32', int]) -> 'Int32':
"""
This function implements logical right shifts, meaning that the sign bit is shifted as well.
This is equivalent to (self.value % 0x100000000) >> ammount
:param ammount: Number of positions to shift
:return: A new Int32 object representing the shifted value (keeps the signed-ness of the source)
"""
if isinstance(ammount, Int32):
ammount = ammount.value
return self.__class__((self.value % 0x100000000) >> ammount)
def __int__(self):
return self.value
def __hex__(self):
return hex(self.value)
@classmethod
def sign_extend(cls, data: Union[bytes, bytearray, int], bits: int):
"""
Create an instance of Int32 by sign extending :param:bits bits from :param:data
to 32 bits
:param data: The source data
:param bits: The number of bits in the source data
:return: An instance of Int32, holding the sign-extended value
"""
if isinstance(data, (bytes, bytearray)):
data = int.from_bytes(data, 'little')
sign = data >> (bits - 1)
if sign > 1:
print("overflow in Int32.sext!")
if sign:
data = (data & (2 ** (bits - 1) - 1)) - 2**(bits-1)
return cls(data)
class UInt32(Int32):
"""
An unsigned version of :class:Int32.
"""
_type = c_uint32
def unsigned(self) -> 'UInt32':
"""
Return a new instance representing the same bytes, but signed
:return:
"""
return self
@property
def unsigned_value(self) -> int:
return self._val.value
def shift_right_logical(self, ammount: Union['Int32', int]) -> 'UInt32':
"""
see :meth:`Int32.shift_right_logical <Int32.shift_right_logical>`
:param ammount: Number of positions to shift
:return: A new Int32 object representing the shifted value (keeps the signed-ness of the source)
"""
return self >> ammount

View File

@ -0,0 +1,88 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional
from ..colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE
from ..helpers import format_bytes
from . import MemoryFlags, T_AbsoluteAddress, InstructionContext, T_RelativeAddress, Instruction
@dataclass
class MemorySection(ABC):
name: str
flags: MemoryFlags
size: int
base: T_AbsoluteAddress
owner: str
context: InstructionContext
@property
def end(self):
return self.base + self.size
@abstractmethod
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
pass
@abstractmethod
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
pass
@abstractmethod
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
pass
def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress] = None, fmt: str = 'hex',
bytes_per_row: int = 16, rows: int = 10, group: int = 4):
if self.flags.executable:
bytes_per_row = 4
highlight = None
if end is None:
end = min(start + (bytes_per_row * (rows // 2)), self.size - 1)
highlight = start
start = max(0, start - (bytes_per_row * (rows // 2)))
if self.flags.executable:
print(FMT_MEM + "{}, viewing {} instructions:".format(
self, (end - start) // 4
) + FMT_NONE)
for addr in range(start, end, 4):
if addr == highlight:
print(FMT_UNDERLINE + FMT_ORANGE, end='')
print("0x{:04x}: {}{}".format(
self.base + addr, self.read_ins(addr), FMT_NONE
))
else:
print(FMT_MEM + "{}, viewing {} bytes:".format(
self, (end - start)
) + FMT_NONE)
aligned_end = end - (end % bytes_per_row) if end % bytes_per_row != 0 else end
for addr in range(start, aligned_end, bytes_per_row):
hi_ind = (highlight - addr) // group if highlight is not None else -1
print("0x{:04x}: {}{}".format(
self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
))
if aligned_end != end:
hi_ind = (highlight - aligned_end) // group if highlight is not None else -1
print("0x{:04x}: {}{}".format(
self.base + aligned_end, format_bytes(
self.read(aligned_end, end % bytes_per_row), fmt, group, hi_ind
), FMT_NONE
))
def dump_all(self, *args, **kwargs):
self.dump(0, self.size, *args, **kwargs)
def __repr__(self):
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
self.__class__.__name__,
self.name,
self.base,
self.size,
self.flags,
self.owner
)

104
riscemu/types/program.py Normal file
View File

@ -0,0 +1,104 @@
from typing import List, Optional, Set
from ..colors import FMT_RED, FMT_BOLD, FMT_NONE, FMT_MEM
from ..helpers import get_section_base_name
from . import InstructionContext, T_AbsoluteAddress, MemorySection
class Program:
"""
This represents a collection of sections which together form an executable program
When you want to create a program which can be located anywhere in memory, set base to None,
this signals the other components, that this is relocatable. Set the base of each section to
the offset in the program, and everything will be taken care of for you.
"""
name: str
context: InstructionContext
global_labels: Set[str]
relative_labels: Set[str]
sections: List[MemorySection]
base: Optional[T_AbsoluteAddress]
is_loaded: bool
@property
def size(self):
if len(self.sections) == 0:
return 0
if self.base is None:
return self.sections[-1].base + self.sections[-1].size
return (self.sections[-1].base - self.base) + self.sections[-1].size
def __init__(self, name: str, base: Optional[int] = None):
self.name = name
self.context = InstructionContext()
self.sections = []
self.global_labels = set()
self.relative_labels = set()
self.base = base
self.is_loaded = False
def add_section(self, sec: MemorySection):
# print a warning when a section is located before the programs base
if self.base is not None:
if sec.base < self.base:
print(
FMT_RED + FMT_BOLD + "WARNING: memory section {} in {} is placed before program base (0x{:x})".format(
sec, self.name, self.base
) + FMT_NONE)
self.sections.append(sec)
# keep section list ordered
self.sections.sort(key=lambda section: section.base)
def __repr__(self):
return "{}(name={},sections={},base={})".format(
self.__class__.__name__, self.name, self.global_labels,
[s.name for s in self.sections], self.base
)
@property
def entrypoint(self):
if '_start' in self.context.labels:
return self.context.labels.get('_start')
if 'main' in self.context.labels:
return self.context.labels.get('main')
for sec in self.sections:
if get_section_base_name(sec.name) == '.text' and sec.flags.executable:
return sec.base
def loaded_trigger(self, at_addr: T_AbsoluteAddress):
"""
This trigger is called when the binary is loaded and its final address in memory is determined
This will do a small sanity check to prevent programs loading twice, or at addresses they don't
expect to be loaded.
Then it will finalize all relative symbols defined in it to point to the correct addresses.
:param at_addr: the address where the program will be located
"""
if self.is_loaded:
if at_addr != self.base:
raise RuntimeError("Program loaded twice at different addresses! This will probably break things!")
return
if self.base is not None and self.base != at_addr:
print(FMT_MEM + 'WARNING: Program loaded at different address then expected! (loaded at {}, '
'but expects to be loaded at {})'.format(at_addr, self.base) + FMT_NONE)
# check if we are relocating
if self.base != at_addr:
offset = at_addr if self.base is None else at_addr - self.base
# move all sections by the offset
for sec in self.sections:
sec.base += offset
# move all relative symbols by the offset
for name in self.relative_labels:
self.context.labels[name] += offset
self.base = at_addr
self.context.base_address = at_addr

View File

@ -0,0 +1,58 @@
import os
from abc import abstractmethod, ABC
from typing import Union, Iterator, List
from . import T_ParserOpts, Program
class ProgramLoader(ABC):
"""
A program loader is always specific to a given source file. It is a place to store all state
concerning the parsing and loading of that specific source file, including options.
"""
def __init__(self, source_path: str, options: T_ParserOpts):
self.source_path = source_path
self.options = options
self.filename = os.path.split(self.source_path)[-1]
@classmethod
@abstractmethod
def can_parse(cls, source_path: str) -> float:
"""
Return confidence that the file located at source_path
should be parsed and loaded by this loader
:param source_path: the path of the source file
:return: the confidence that this file belongs to this parser
"""
pass
@classmethod
@abstractmethod
def get_options(cls, argv: list[str]) -> [List[str], T_ParserOpts]:
"""
parse command line args into an options dictionary
:param argv: the command line args list
:return: all remaining command line args and the parser options object
"""
pass
@classmethod
def instantiate(cls, source_path: str, options: T_ParserOpts) -> 'ProgramLoader':
"""
Instantiate a loader for the given source file with the required arguments
:param source_path: the path to the source file
:param options: the parsed options (guaranteed to come from this classes get_options method.
:return: An instance of a ProgramLoader for the spcified source
"""
return cls(source_path, options)
@abstractmethod
def parse(self) -> Union[Program, Iterator[Program]]:
"""
:return:
"""
pass

View File

@ -0,0 +1,26 @@
from typing import Union, Tuple
from . import Instruction, T_RelativeAddress, InstructionContext
from ..helpers import parse_numeric_argument
class SimpleInstruction(Instruction):
def __init__(self, name: str, args: Union[Tuple[()], Tuple[str], Tuple[str, str], Tuple[str, str, str]],
context: InstructionContext, addr: T_RelativeAddress):
self.context = context
self.name = name
self.args = args
self.addr = addr
def get_imm(self, num: int) -> int:
resolved_label = self.context.resolve_label(self.args[num], self.addr)
if resolved_label is None:
return parse_numeric_argument(self.args[num])
return resolved_label
def get_imm_reg(self, num: int) -> Tuple[int, str]:
return self.get_imm(num + 1), self.get_reg(num)
def get_reg(self, num: int) -> str:
return self.args[num]

View File

@ -8,7 +8,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
setuptools.setup(
name="riscemu",
version=riscemu.__version__,
author="Anton Lydike",
author=riscemu.__author__,
author_email="pip@antonlydike.de",
description="RISC-V userspace and privileged emulator",
long_description=long_description,
@ -23,7 +23,7 @@ setuptools.setup(
"Operating System :: OS Independent",
],
package_dir={"": "."},
packages=["riscemu", "riscemu.decoder", "riscemu.instructions", "riscemu.IO", "riscemu.priv"],
packages=["riscemu", "riscemu.decoder", "riscemu.instructions", "riscemu.IO", "riscemu.priv", "riscemu.types"],
python_requires=">=3.6",
install_requires=[
"pyelftools~=0.27"

View File

@ -24,11 +24,11 @@ if os.getenv('READTHEDOCS', False) and not os.path.exists('riscemu.rst'):
# -- Project information -----------------------------------------------------
project = 'RiscEmu'
copyright = '2021, Anton Lydike'
copyright = '2022, Anton Lydike'
author = 'Anton Lydike'
# The full version, including alpha/beta/rc tags
release = '0.1.0'
release = '2.0.0a2'
# -- General configuration ---------------------------------------------------

View File

@ -0,0 +1,3 @@
from .test_tokenizer import *
from .test_helpers import *
from .test_integers import *

View File

View File

@ -0,0 +1,73 @@
import contextlib
import os
from abc import abstractmethod
from tempfile import NamedTemporaryFile
from typing import Optional, Union, Tuple
from unittest import TestCase
from riscemu import CPU, UserModeCPU, InstructionSetDict, RunConfig
from riscemu.types import Program
class EndToEndTest(TestCase):
def __init__(self, cpu: Optional[CPU] = None):
super().__init__()
if cpu is None:
cpu = UserModeCPU(InstructionSetDict.values(), RunConfig())
self.cpu = cpu
@abstractmethod
def get_source(self) -> Tuple[str, Union[bytes, str, bytearray]]:
"""
This method returns the source code of the program
:return:
"""
pass
def test_run_program(self):
"""
Runs the program and verifies output
:return:
"""
with self.with_source_file() as names:
fname, orig_name = names
loader = self.cpu.get_best_loader_for(fname)
self.program = loader.instantiate(fname, loader.get_options([])).parse()
self._change_program_file_name(self.program, orig_name)
self.cpu.load_program(self.program)
self.after_program_load(self.program)
if isinstance(self.cpu, UserModeCPU):
self.cpu.setup_stack()
try:
self.cpu.launch(self.program)
except Exception as ex:
if self.is_exception_expected(ex):
pass
raise ex
@contextlib.contextmanager
def with_source_file(self):
name, content = self.get_source()
if isinstance(content, str):
f = NamedTemporaryFile('w', suffix=name, delete=False)
else:
f = NamedTemporaryFile('wb', suffix=name, delete=False)
f.write(content)
f.flush()
f.close()
try:
yield f.name, name
finally:
os.unlink(f.name)
def after_program_load(self, program):
pass
def is_exception_expected(self, ex: Exception) -> bool:
return False
def _change_program_file_name(self, program: Program, new_name: str):
program.name = new_name
for sec in program.sections:
sec.owner = new_name

View File

@ -3,28 +3,17 @@ from unittest import TestCase
from riscemu.helpers import *
class Test(TestCase):
def test_int_to_bytes(self):
self.assertEqual(int_to_bytes(-1), bytearray([0xff] * 4), "-1")
self.assertEqual(int_to_bytes(1), bytearray([0, 0, 0, 1]), "1")
self.assertEqual(int_to_bytes(1231132), bytearray(b'\x00\x12\xc9\x1c'), "random number")
self.assertEqual(int_to_bytes(-1231132), bytearray(b'\xff\xed6\xe4'), "random negative number")
class TestHelpers(TestCase):
def test_int_from_bytes(self):
self.assertEqual(bytearray([0xff] * 4), int_to_bytes(-1), "-1")
self.assertEqual(bytearray([0, 0, 0, 1]), int_to_bytes(1), "1")
self.assertEqual(bytearray(b'\x00\x12\xc9\x1c'), int_to_bytes(1231132), "random number")
self.assertEqual(bytearray(b'\xff\xed6\xe4'), int_to_bytes(-1231132), "random negative number")
def test_to_unsigned(self):
self.assertEqual(to_unsigned(-1), 0xFFFFFFFF)
self.assertEqual(to_unsigned(-100), 0xffffff9c)
self.assertEqual(to_unsigned(1), 1)
self.assertEqual(to_unsigned(0xffffffff), 0xffffffff)
self.assertEqual(to_unsigned(0xffed36e4), 0xffed36e4)
def test_to_signed(self):
self.assertEqual(to_signed(0xFFFFFFFF), -1)
self.assertEqual(to_signed(0xffed36e4), -1231132)
self.assertEqual(to_signed(0x0FFFFFFF), 0x0FFFFFFF)
def test_bind_twos_complement(self):
minval = -(1 << 31)
maxval = ((1 << 31)-1)
self.assertEqual(bind_twos_complement(minval), minval, "minval preserves")
self.assertEqual(bind_twos_complement(minval), minval, )
self.assertEqual(bind_twos_complement(maxval), maxval, "maxval preserves")
self.assertEqual(bind_twos_complement(minval - 1), maxval, "minval-1 wraps")
self.assertEqual(bind_twos_complement(maxval + 1), minval, "maxval+1 wraps")
self.assertEqual(bind_twos_complement(0), 0, "0 is 0")
self.assertEqual(bind_twos_complement(1), 1, "1 is 1")
self.assertEqual(bind_twos_complement(-1), -1, "-1 is -1")

19
test/test_integers.py Normal file
View File

@ -0,0 +1,19 @@
from unittest import TestCase
from riscemu.types import Int32, UInt32
class TestTokenizer(TestCase):
def test_logical_right_shift(self):
a = Int32(100)
self.assertEqual(a.shift_right_logical(0), a)
self.assertEqual(a.shift_right_logical(10), 0)
self.assertEqual(a.shift_right_logical(1), 100>>1)
a = Int32(-100)
self.assertEqual(a.shift_right_logical(0), a)
self.assertEqual(a.shift_right_logical(1), 2147483598)
self.assertEqual(a.shift_right_logical(10), 4194303)
self.assertEqual(a.shift_right_logical(31), 1)
self.assertEqual(a.shift_right_logical(32), 0)

75
test/test_isa.py Normal file
View File

@ -0,0 +1,75 @@
from riscemu.colors import FMT_ERROR, FMT_NONE, FMT_BOLD, FMT_GREEN
from riscemu.instructions import InstructionSet
from riscemu.types import Instruction, CPU
from riscemu.decoder import RISCV_REGS
FMT_SUCCESS = FMT_GREEN + FMT_BOLD
def assert_equals(ins: Instruction, cpu: CPU):
a, b = (get_arg_from_ins(ins, i, cpu) for i in (0, 2))
return a == b
def assert_equals_mem(ins: Instruction, cpu: CPU):
a, b = (get_arg_from_ins(ins, i, cpu) for i in (0, 2))
a = cpu.mmu.read_int(a)
return a == b
def assert_in(ins: Instruction, cpu: CPU):
a = get_arg_from_ins(ins, 0, cpu)
others = [get_arg_from_ins(ins, i, cpu) for i in range(2, len(ins.args))]
return a in others
def _not(func):
def test(ins: Instruction, cpu: CPU):
return not func(ins, cpu)
return test
def get_arg_from_ins(ins: Instruction, num: int, cpu: CPU):
a = ins.args[num]
if a in RISCV_REGS:
return cpu.regs.get(a)
return ins.get_imm(num)
assert_ops = {
'==': assert_equals,
'!=': _not(assert_equals),
'in': assert_in,
'not_in': _not(assert_in),
}
class TestIS(InstructionSet):
def __init__(self, cpu: 'CPU'):
print('[Test] loading testing ISA, this is only meant for running testcases and is not part of the RISC-V ISA!')
self.failed = False
super().__init__(cpu)
def instruction_assert(self, ins: Instruction):
if len(ins.args) < 3:
print(FMT_ERROR + '[Test] Unknown assert statement: {}'.format(ins) + FMT_NONE)
return
op = ins.args[1]
if op not in assert_ops:
print(FMT_ERROR + '[Test] Unknown operation statement: {} in {}'.format(op, ins) + FMT_NONE)
return
if assert_ops[op](ins, self.cpu):
print(FMT_SUCCESS + '[TestCase] 🟢 passed assertion {}'.format(ins))
else:
print(FMT_ERROR + '[TestCase] 🔴 failed assertion {}'.format(ins))
self.cpu.halted = True
self.failed = True
def instruction_fail(self, ins: Instruction):
print(FMT_ERROR + '[TestCase] 🔴 reached fail instruction! {}'.format(ins))
self.cpu.halted = True
self.failed = True
def assert_mem(self, ins: Instruction):

126
test/test_tokenizer.py Normal file
View File

@ -0,0 +1,126 @@
from unittest import TestCase
from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA, \
split_whitespace_respecting_quotes
def ins(name: str) -> Token:
return Token(TokenType.INSTRUCTION_NAME, name)
def arg(name: str) -> Token:
return Token(TokenType.ARGUMENT, name)
def op(name: str) -> Token:
return Token(TokenType.PSEUDO_OP, name)
def lbl(name: str) -> Token:
return Token(TokenType.LABEL, name)
class TestTokenizer(TestCase):
def test_instructions(self):
program = [
'li a0, 144',
'divi a0, a0, 12',
'xori a1, a0, 12'
]
tokens = [
ins('li'), arg('a0'), COMMA, arg('144'), NEWLINE,
ins('divi'), arg('a0'), COMMA, arg('a0'), COMMA, arg('12'), NEWLINE,
ins('xori'), arg('a1'), COMMA, arg('a0'), COMMA, arg('12'), NEWLINE,
]
self.assertEqual(list(tokenize(program)), tokens)
def test_comments(self):
parsed_res = [
ins('li'), arg('a0'), COMMA, arg('144'), NEWLINE
]
for c in ('#', '//', ';'):
lines = [
c + ' this is a comment',
'li a0, 144'
]
self.assertEqual(list(tokenize(lines)), parsed_res)
def test_pseudo_ins(self):
parsed_res = [
Token(TokenType.PSEUDO_OP, '.section'), Token(TokenType.ARGUMENT, '.text'), NEWLINE,
Token(TokenType.PSEUDO_OP, '.type'), Token(TokenType.ARGUMENT, 'init'), COMMA,
Token(TokenType.ARGUMENT, '@function'), NEWLINE
]
input_program = [
'.section .text',
'.type init, @function'
]
self.assertEqual(list(tokenize(input_program)), parsed_res)
def test_full_program(self):
program = """
# a hashtag comment
; semicolon comment followed by an empty line
.section .text
// double slash comment
addi sp, sp, -32
sw s0, 0(ra)
section:
sub s0, s0, s0
"""
tokens = [
op('.section'), arg('.text'), NEWLINE,
ins('addi'), arg('sp'), COMMA, arg('sp'), COMMA, arg('-32'), NEWLINE,
ins('sw'), arg('s0'), COMMA, arg('ra'), arg('0'), NEWLINE,
lbl('section:'), NEWLINE,
ins('sub'), arg('s0'), COMMA, arg('s0'), COMMA, arg('s0'), NEWLINE
]
self.assertEqual(list(tokenize(program.splitlines())), tokens)
def test_split_whitespace_respecting_quotes_single(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("test")), ["test"]
)
def test_split_whitespace_respecting_quotes_empty(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("")), []
)
def test_split_whitespace_respecting_quotes_two_parts(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("test 123")), ["test", "123"]
)
def test_split_whitespace_respecting_quotes_whole_quoted(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("'test 123'")), ["test 123"]
)
def test_split_whitespace_respecting_quotes_double_quotes(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('"test 123"')), ["test 123"]
)
def test_split_whitespace_respecting_quotes_quoted_then_normal(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('"test 123" abc')), ["test 123", "abc"]
)
def test_split_whitespace_respecting_quotes_quoted_sorrounded(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello "test 123" abc')), ["hello", "test 123", "abc"]
)
def test_split_whitespace_respecting_quotes_weird_spaces(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello "test 123"\tabc')), ["hello", "test 123", "abc"]
)
def test_split_whitespace_respecting_quotes_quotes_no_spaces(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello"test 123"abc')), ["hello", "test 123", "abc"]
)

View File

@ -0,0 +1,53 @@
from riscemu import AssemblyFileLoader
from riscemu.colors import *
FMT_SUCCESS = FMT_GREEN + FMT_BOLD
def run_test(path: str):
from riscemu import CPU, UserModeCPU, RunConfig
from riscemu.instructions import InstructionSetDict
from test.test_isa import TestIS
import os
fname = os.path.basename(path)
ISAs = list(InstructionSetDict.values())
ISAs.append(TestIS)
cpu = UserModeCPU(ISAs, RunConfig())
try:
program = AssemblyFileLoader(path, {}).parse()
cpu.load_program(program)
cpu.launch(program)
except Exception as ex:
print(FMT_ERROR + '[Test] 🔴 failed with exception "{}" ({})'.format(ex, fname) + FMT_NONE)
raise ex
if cpu.halted:
for isa in cpu.instruction_sets:
if isinstance(isa, TestIS):
if not isa.failed:
print(FMT_SUCCESS + '[Test] 🟢 successful {}'.format(fname) + FMT_NONE)
return not isa.failed
return False
if __name__ == '__main__':
import os
import glob
successes = 0
failures = 0
ttl = 0
for path in glob.glob(f'{os.path.dirname(__file__)}/*.asm'):
print(FMT_BLUE + '[Test] running testcase ' + os.path.basename(path) + FMT_NONE)
ttl += 1
if run_test(path):
successes += 1
else:
failures += 1

View File

@ -0,0 +1,7 @@
.data
data:
.word 0xFFFFFFFF, 0x0000FFFF, 0xFF00FF00, 0x7FFFFFFF
.text
ebreak

View File

@ -0,0 +1,20 @@
.text
main:
addi a0, zero, main
addi a1, zero, main
addi t0, zero, 1000
assert a0, ==, 0x100
1:
addi a1, a1, 1
blt a1, t0, 1b
sub a1, a1, a0
j 1f
addi a1, zero, 0
fail
1:
assert a1, ==, 744
add a0, zero, a1 ; set exit code to a1
addi a7, zero, SCALL_EXIT ; exit syscall code
scall
fail