From 6bc939572b254656c2d480c3ac8717a69af619df Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Sat, 17 Apr 2021 19:06:24 +0200 Subject: [PATCH] parsing and simple running works somewhat --- riscemu/CPU.py | 289 ++++++++++++++++++++++++------------ riscemu/Exceptions.py | 68 ++++++++- riscemu/Executable.py | 203 ++++++++++++++++++++++++- riscemu/ExecutableParser.py | 43 +++--- riscemu/MMU.py | 64 ++++++-- riscemu/Tokenizer.py | 19 ++- riscemu/__init__.py | 11 +- riscemu/helpers.py | 38 +++++ riscemu/main.py | 3 - run.py | 29 ++-- 10 files changed, 607 insertions(+), 160 deletions(-) create mode 100644 riscemu/helpers.py delete mode 100644 riscemu/main.py diff --git a/riscemu/CPU.py b/riscemu/CPU.py index 969d72d..3044e31 100644 --- a/riscemu/CPU.py +++ b/riscemu/CPU.py @@ -1,6 +1,14 @@ +import traceback from dataclasses import dataclass from collections import defaultdict +from .Exceptions import * +from .helpers import * + +import typing +if typing.TYPE_CHECKING: + from . import MMU, Executable, LoadedExecutable, LoadedInstruction + COLOR = True FMT_ORANGE = '\033[33m' @@ -11,11 +19,10 @@ FMT_UNDERLINE = '\033[4m' class Registers: - def __init__(self, cpu: 'CPU'): - self.cpu = cpu + def __init__(self): self.vals = defaultdict(lambda: 0) - self.last_mod = 'ft0' - self.last_access = 'a3' + self.last_mod = None + self.last_access = None def dump(self, small=False): named_regs = [self.reg_repr(reg) for reg in Registers.named_registers()] @@ -55,6 +62,9 @@ class Registers: print("\t" + " ".join(line)) print(")") + def dump_reg_a(self): + print("Registers[a]:" + " ".join(self.reg_repr('a{}'.format(i)) for i in range(8))) + def reg_repr(self, reg): txt = '{:4}=0x{:08X}'.format(reg, self.get(reg)) if reg == 'fp': @@ -74,18 +84,16 @@ class Registers: print("[Registers.set] trying to set read-only register: {}".format(reg)) return False if reg not in Registers.all_registers(): - print("[Registers.set] invalid register name: {}".format(reg)) - return False + raise InvalidRegisterException(reg) # replace fp register with s1, as these are the same register if reg == 'fp': reg = 's1' self.last_mod = reg - setattr(self, reg, val) + self.vals[reg] = val def get(self, reg): if not reg in Registers.all_registers(): - print("[Registers.get] invalid register name: {}".format(reg)) - return 0 + raise InvalidRegisterException(reg) if reg == 'fp': reg = 's0' return self.vals[reg] @@ -105,134 +113,220 @@ class Registers: return ['zero', 'ra', 'sp', 'gp', 'tp', 'fp'] class CPU: - def instruction_lb(self, instruction): - pass + def __init__(self): + from . import MMU, Executable, LoadedExecutable, LoadedInstruction + + self.mmu = MMU() + self.regs = Registers() + self.pc = 0 + self.exit = False + + self.syscall_int = SyscallInterface() + + def load(self, e: 'Executable'): + return self.mmu.load_bin(e) + + def run_loaded(self, le: 'LoadedExecutable'): + self.pc = le.run_ptr + sp, hp = le.stack_heap + self.regs.set('sp', sp) + self.regs.set('a0', hp) # set a0 to point to the heap - def instruction_lh(self, instruction): - pass + self.__run() - def instruction_lw(self, instruction): - pass + def __run(self): + if self.pc <= 0: + return False + ins = None + try: + while not self.exit: + ins = self.mmu.read_ins(self.pc) + self.pc += 1 + self.__run_instruction(ins) + except RiscemuBaseException as ex: + print("[CPU] excpetion caught at {}:".format(ins)) + print(" " + ex.message()) + traceback.print_exception(type(ex), ex, ex.__traceback__) + + def __run_instruction(self, ins: 'LoadedInstruction'): + name = 'instruction_' + ins.name + if hasattr(self, name): + getattr(self, name)(ins) + else: + raise RuntimeError("Unknown instruction: {}".format(ins)) + + def instruction_lb(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) + + def instruction_lh(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_lbu(self, instruction): - pass + def instruction_lw(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_lhu(self, instruction): - pass + def instruction_lbu(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_sb(self, instruction): - pass + def instruction_lhu(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) + + def instruction_sb(self, ins: 'LoadedInstruction'): + src = ins.get_reg(0) + if len(ins.args) == 2: + reg, imm = ins.get_imm_reg(1) + else: + reg = ins.get_reg(1) + imm = ins.get_imm(2) + addr = self.regs.get(reg) + imm + self.mmu.write(addr, 1, int_to_bytes(self.regs.get(reg), 1)) + + def instruction_sh(self, ins: 'LoadedInstruction'): + src = ins.get_reg(0) + if len(ins.args) == 2: + reg, imm = ins.get_imm_reg(1) + else: + reg = ins.get_reg(1) + imm = ins.get_imm(2) + addr = self.regs.get(reg) + imm + self.mmu.write(addr, 2, int_to_bytes(self.regs.get(reg), 2)) + + def instruction_sw(self, ins: 'LoadedInstruction'): + src = ins.get_reg(0) + if len(ins.args) == 2: + imm, reg = ins.get_imm_reg(1) + else: + reg = ins.get_reg(1) + imm = ins.get_imm(2) + addr = self.regs.get(reg) + imm + self.mmu.write(addr, 4, int_to_bytes(self.regs.get(reg), 4)) - def instruction_sh(self, instruction): - pass + def instruction_sll(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_sw(self, instruction): - pass + def instruction_slli(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_sll(self, instruction): - pass + def instruction_srl(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_slli(self, instruction): - pass + def instruction_srli(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_srl(self, instruction): - pass + def instruction_sra(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_srli(self, instruction): - pass + def instruction_srai(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_sra(self, instruction): - pass + def instruction_add(self, ins: 'LoadedInstruction'): + dst = ins.get_reg(0) + src1 = ins.get_reg(1) + src2 = ins.get_reg(2) + self.regs.set( + dst, + self.regs.get(src1) + self.regs.get(src2) + ) - def instruction_srai(self, instruction): - pass + def instruction_addi(self, ins: 'LoadedInstruction'): + dst = ins.get_reg(0) + src1 = ins.get_reg(1) + imm = ins.get_imm(2) + self.regs.set( + dst, + self.regs.get(src1) + imm + ) - def instruction_add(self, instruction): - pass + def instruction_sub(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_addi(self, instruction): - pass + def instruction_lui(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_sub(self, instruction): - pass + def instruction_auipc(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_lui(self, instruction): - pass + def instruction_xor(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_auipc(self, instruction): - pass + def instruction_xori(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_xor(self, instruction): - pass + def instruction_or(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_xori(self, instruction): - pass + def instruction_ori(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_or(self, instruction): - pass + def instruction_and(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_ori(self, instruction): - pass + def instruction_andi(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_and(self, instruction): - pass + def instruction_slt(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_andi(self, instruction): - pass + def instruction_slti(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_slt(self, instruction): - pass + def instruction_sltu(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_slti(self, instruction): - pass + def instruction_sltiu(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_sltu(self, instruction): - pass + def instruction_beq(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_sltiu(self, instruction): - pass + def instruction_bne(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_beq(self, instruction): - pass + def instruction_blt(self, ins: 'LoadedInstruction'): + ASSERT_LEN(ins.args, 3) + reg1 = ins.get_reg(0) + reg2 = ins.get_reg(1) + dest = ins.get_imm(2) + if self.regs.get(reg1) < self.regs.get(reg2): + self.pc = dest - def instruction_bne(self, instruction): - pass - def instruction_blt(self, instruction): - pass + def instruction_bge(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_bge(self, instruction): - pass + def instruction_bltu(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_bltu(self, instruction): - pass + def instruction_bgeu(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_bgeu(self, instruction): - pass + def instruction_j(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_j(self, instruction): - pass + def instruction_jr(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_jr(self, instruction): - pass + def instruction_jal(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_jal(self, instruction): - pass + def instruction_jalr(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_jalr(self, instruction): - pass + def instruction_ret(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_ret(self, instruction): - pass + def instruction_scall(self, ins: 'LoadedInstruction'): + syscall = Syscall(self.regs.get('a7'), self.regs) + self.syscall_int.handle_syscall(syscall) - def instruction_scall(self, instruction): - pass + def instruction_break(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_break(self, instruction): - pass + def instruction_nop(self, ins: 'LoadedInstruction'): + INS_NOT_IMPLEMENTED(ins) - def instruction_nop(self, instruction): - pass + def instruction_dbg(self, ins: 'LoadedInstruction'): + import code + code.interact(local=dict(globals(), **locals())) @staticmethod def all_instructions(): @@ -249,8 +343,7 @@ class Syscall: class SyscallInterface: def handle_syscall(self, scall: Syscall): - pass + print("syscall {} received!".format(scall.id)) + scall.registers.dump_reg_a() -a = Registers(None) -a.dump() diff --git a/riscemu/Exceptions.py b/riscemu/Exceptions.py index 7bcb159..7182d6d 100644 --- a/riscemu/Exceptions.py +++ b/riscemu/Exceptions.py @@ -1,4 +1,13 @@ -class ParseException(BaseException): +from abc import abstractmethod + + +class RiscemuBaseException(BaseException): + @abstractmethod + def message(self): + pass + + +class ParseException(RiscemuBaseException): def __init__(self, msg, data=None): super().__init__() self.msg = msg @@ -25,9 +34,62 @@ def ASSERT_NOT_NULL(a1): def ASSERT_NOT_IN(a1, a2): if a1 in a2: - raise ParseException("ASSERTION_FAILED: Expected {} to not be in {}".format(a1, a2), (a1,a2)) + raise ParseException("ASSERTION_FAILED: Expected {} to not be in {}".format(a1, a2), (a1, a2)) def ASSERT_IN(a1, a2): if a1 not in a2: - raise ParseException("ASSERTION_FAILED: Expected {} to not be in {}".format(a1, a2), (a1,a2)) + raise ParseException("ASSERTION_FAILED: Expected {} to not be in {}".format(a1, a2), (a1, a2)) + + +class MemoryAccessException(RiscemuBaseException): + def __init__(self, msg, addr, size, op): + super(MemoryAccessException, self).__init__() + self.msg = msg + self.addr = addr + self.size = size + self.op = op + + def message(self): + return "{}(During {} at 0x{:08x} of size {}: {})".format( + self.__class__.__name__, + self.op, + self.addr, + self.size, + self.msg + ) + + +class OutOfMemoryEsception(RiscemuBaseException): + def __init__(self, action): + self.action = action + + def message(self): + return '{}(Ran out of memory during {})'.format( + self.__class__.__name__, + self.action + ) + + +class UnimplementedInstruction(RiscemuBaseException): + def __init__(self, ins: 'LoadedInstruction'): + self.ins = ins + + def message(self): + return "{}({})".format( + self.__class__.__name__, + repr(self.ins) + ) + +class InvalidRegisterException(RiscemuBaseException): + def __init__(self, reg): + self.reg = reg + + def message(self): + return "{}(Invalid register {})".format( + self.__class__.__name__, + self.reg + ) + +def INS_NOT_IMPLEMENTED(ins): + raise UnimplementedInstruction(ins) \ No newline at end of file diff --git a/riscemu/Executable.py b/riscemu/Executable.py index 94fb995..62992a6 100644 --- a/riscemu/Executable.py +++ b/riscemu/Executable.py @@ -1,7 +1,18 @@ from dataclasses import dataclass, field -from typing import Dict, List, Tuple -from . import MemoryFlags, RiscVInstructionToken, RiscVTokenizer, RiscVSymbolToken, RiscVPseudoOpToken +from typing import Dict, List, Tuple, Union, Optional from .Exceptions import * +from .helpers import parse_numeric_argument, align_addr + +import typing +if typing.TYPE_CHECKING: + from .Tokenizer import RiscVInstructionToken + + + +@dataclass(frozen=True) +class MemoryFlags: + read_only: bool + executable: bool @dataclass @@ -9,25 +20,201 @@ class MemorySection: name: str flags: MemoryFlags size: int = 0 - start: int = -1 content: List[bytearray] = field(default_factory=list) def add(self, data: bytearray): self.content.append(data) self.size += len(data) + def continuous_content(self, parent: 'LoadedExecutable'): + """ + converts the content into one continuous bytearray + """ + if self.size == 0: + return bytearray(0) + content = self.content[0] + for b in self.content[1:]: + content += b + return content + +@dataclass class InstructionMemorySection(MemorySection): - insn: List[RiscVInstructionToken] = field(default_factory=list) + content: List['RiscVInstructionToken'] = field(default_factory=list) - def add_insn(self, insn: RiscVInstructionToken): - self.insn.append(insn) - self.size += 4 + def add_insn(self, insn: 'RiscVInstructionToken'): + self.content.append(insn) + self.size += 1 + def continuous_content(self, parent: 'LoadedExecutable'): + return [ + LoadedInstruction(ins.instruction, ins.args, parent) + for ins in self.content + ] -@dataclass + +@dataclass(frozen=True) class Executable: run_ptr: Tuple[str, int] sections: Dict[str, MemorySection] symbols: Dict[str, Tuple[str, int]] + stack_pref: Optional[int] + + +### LOADING CODE + + +@dataclass(frozen=True) +class LoadedInstruction: + """ + An instruction which is loaded into memory. It knows the binary it belongs to to resolve symbols + """ + name: str + args: List[str] + bin: 'LoadedExecutable' + + def get_imm(self, num: int): + """ + parse and get immediate argument + """ + if len(self.args) <= num: + raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args)) + arg = self.args[num] + # look up symbols + if arg in self.bin.symbols: + return self.bin.symbols[arg] + return parse_numeric_argument(arg) + + def get_imm_reg(self, num: int): + """ + parse and get an argument imm(reg) + """ + if len(self.args) <= num: + raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args)) + arg = self.args[num] + ASSERT_IN("(", arg) + imm, reg = arg[:-1].split("(") + if imm in self.bin.symbols: + return self.bin.symbols[imm], reg + return parse_numeric_argument(imm), reg + + def get_reg(self, num: int): + """ + parse and get an register argument + """ + if len(self.args) <= num: + raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args)) + return self.args[num] + + def __repr__(self): + return "{} {}".format(self.name, ", ".join(self.args)) + + +@dataclass(frozen=True) +class LoadedMemorySection: + """ + A section which is loaded into memory + """ + name: str + base: int + size: int + content: Union[List[LoadedInstruction], bytearray] + flags: MemoryFlags + + def read(self, offset: int, size: int): + if offset < 0: + raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, size, 'read') + if offset + size >= self.size: + raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset, + size, 'read') + return self.content[offset: offset + size] + + def read_instruction(self, offset): + if not self.flags.executable: + raise MemoryAccessException('Section not executable!', self.base + offset, 1, 'read exec') + + if offset < 0: + raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'read exec') + if offset >= self.size: + raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset, + 1, 'read exec') + return self.content[offset] + + def write(self, offset, size, data): + if self.flags.read_only: + raise MemoryAccessException('Section not writeable {}'.format(self.name), self.base + offset, size, 'write') + + if offset < 0: + raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'write') + if offset >= self.size: + raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset, + size, 'write') + + for i in range(size): + self.content[offset + i] = data[i] + + +class LoadedExecutable: + """ + This represents an executable which is loaded into memory at address base_addr + + This is basicalle the "loader" in normal system environments + It initializes the stack and heap + + It still holds a symbol table, that is not accessible memory since I don't want to deal with + binary strings in memory etc. + """ + base_addr: int + sections_by_name: Dict[str, LoadedMemorySection] + sections: List[LoadedMemorySection] + symbols: Dict[str, int] + run_ptr: int + stack_heap: Tuple[int, int] # pointers to stack and heap, are nullptr if no stack/heap is available + + def __init__(self, exe: Executable, base_addr: int): + self.base_addr = base_addr + self.sections = list() + self.sections_by_name = dict() + self.symbols = dict() + + # stack/heap if wanted + if exe.stack_pref is not None: + self.sections.append(LoadedMemorySection( + 'stack', + base_addr, + exe.stack_pref, + bytearray(exe.stack_pref), + MemoryFlags(read_only=False, executable=False) + )) + self.stack_heap = (self.base_addr, self.base_addr + exe.stack_pref) + else: + self.stack_heap = (0, 0) + + curr = base_addr + for sec in exe.sections.values(): + loaded_sec = LoadedMemorySection( + sec.name, + curr, + sec.size, + sec.continuous_content(self), + sec.flags + ) + self.sections.append(loaded_sec) + self.sections_by_name[loaded_sec.name] = loaded_sec + curr = align_addr(loaded_sec.size + curr) + + for name, (sec_name, offset) in exe.symbols.items(): + ASSERT_IN(sec_name, self.sections_by_name) + self.symbols[name] = self.sections_by_name[sec_name].base + offset + + self.size = curr - base_addr + + # translate run_ptr from executable + run_ptr_sec, run_ptr_off = exe.run_ptr + self.run_ptr = self.sections_by_name[run_ptr_sec].base + run_ptr_off + print("successfully loaded binary\n\tsize: {}\n\tsections: {}\n\trun_ptr: 0x{:08x}".format( + self.size, + " ".join(self.sections_by_name.keys()), + self.run_ptr + )) diff --git a/riscemu/ExecutableParser.py b/riscemu/ExecutableParser.py index 5eacdc2..1fcb100 100644 --- a/riscemu/ExecutableParser.py +++ b/riscemu/ExecutableParser.py @@ -1,25 +1,23 @@ +from .helpers import parse_numeric_argument from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags from .Exceptions import * -from .Tokenizer import RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, RiscVPseudoOpToken -from typing import Dict, Tuple, List +from .Tokenizer import RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, RiscVPseudoOpToken +from typing import Dict, Tuple, List, Optional -def parse_numeric_argument(arg: str): - if arg.startswith('0x') or arg.startswith('0X'): - return int(arg, 16) - return int(arg) class ExecutableParser: - tokenizer: RiscVTokenizer + tokenizer: 'RiscVTokenizer' - def __init__(self, tokenizer: RiscVTokenizer): + def __init__(self, tokenizer: 'RiscVTokenizer'): self.instructions: List[RiscVInstructionToken] = list() self.symbols: Dict[str, Tuple[str, int]] = dict() self.sections: Dict[str, MemorySection] = dict() self.tokenizer = tokenizer - self.active_section = None + self.active_section: Optional[str] = None self.implicit_sections = False + self.stack_pref: Optional[int] = None def parse(self): for token in self.tokenizer.tokens: @@ -36,9 +34,9 @@ class ExecutableParser: start_ptr = self.symbols['_start'] elif 'main' in self.symbols: start_ptr = self.symbols['main'] - return Executable(start_ptr, self.sections, self.symbols) + return Executable(start_ptr, self.sections, self.symbols, self.stack_pref) - def parse_instruction(self, ins: RiscVInstructionToken): + def parse_instruction(self, ins: 'RiscVInstructionToken'): if self.active_section is None: self.op_text() self.implicit_sections = True @@ -50,12 +48,12 @@ class ExecutableParser: else: raise ParseException("SHOULD NOT BE REACHED") - def handle_symbol(self, token: RiscVSymbolToken): + def handle_symbol(self, token: 'RiscVSymbolToken'): ASSERT_NOT_IN(token.name, self.symbols) sec_pos = self.curr_sec().size self.symbols[token.name] = (self.active_section, sec_pos) - def handle_pseudo_op(self, op: RiscVPseudoOpToken): + def handle_pseudo_op(self, op: 'RiscVPseudoOpToken'): name = 'op_' + op.name if hasattr(self, name): getattr(self, name)(op) @@ -63,39 +61,44 @@ class ExecutableParser: raise ParseException("Unknown pseudo op: {}".format(op), (op,)) ## Pseudo op implementations: - def op_section(self, op: RiscVPseudoOpToken): + def op_section(self, op: 'RiscVPseudoOpToken'): ASSERT_LEN(op.args, 1) name = op.args[0][1:] ASSERT_IN(name, ('data', 'rodata', 'text')) getattr(self, 'op_' + name)(op) - def op_text(self, op: RiscVPseudoOpToken = None): + def op_text(self, op: 'RiscVPseudoOpToken' = None): self.set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection) - def op_data(self, op: RiscVPseudoOpToken = None): + def op_data(self, op: 'RiscVPseudoOpToken' = None): self.set_sec('data', MemoryFlags(read_only=False, executable=False)) - def op_rodata(self, op: RiscVPseudoOpToken = None): + def op_rodata(self, op: 'RiscVPseudoOpToken' = None): self.set_sec('rodata', MemoryFlags(read_only=True, executable=False)) - def op_space(self, op: RiscVPseudoOpToken): + def op_space(self, op: 'RiscVPseudoOpToken'): ASSERT_IN(self.active_section, ('data', 'rodata')) ASSERT_LEN(op.args, 1) size = parse_numeric_argument(op.args[0]) self.curr_sec().add(bytearray(size)) - def op_ascii(self, op: RiscVPseudoOpToken): + def op_ascii(self, op: 'RiscVPseudoOpToken'): ASSERT_IN(self.active_section, ('data', 'rodata')) ASSERT_LEN(op.args, 1) str = op.args[0][1:-1] self.curr_sec().add(bytearray(str, 'ascii')) - def op_asciiz(self, op: RiscVPseudoOpToken): + def op_asciiz(self, op: 'RiscVPseudoOpToken'): ASSERT_IN(self.active_section, ('data', 'rodata')) ASSERT_LEN(op.args, 1) str = op.args[0][1:-1] self.curr_sec().add(bytearray(str + '\0', 'ascii')) + def op_stack(self, op: 'RiscVPseudoOpToken'): + ASSERT_LEN(op.args, 1) + size = parse_numeric_argument(op.args) + self.stack_pref = size + ## Section handler code def set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection): if name not in self.sections: diff --git a/riscemu/MMU.py b/riscemu/MMU.py index c8805b8..3a83c27 100644 --- a/riscemu/MMU.py +++ b/riscemu/MMU.py @@ -1,15 +1,61 @@ from dataclasses import dataclass -@dataclass(frozen=True) -class MemoryFlags: - read_only: bool - executable: bool - -class MemoryRegion: - addr:int - len:int - flags: MemoryFlags +from .Executable import Executable, LoadedExecutable, LoadedMemorySection +from .helpers import align_addr +from .Exceptions import OutOfMemoryEsception +from typing import Dict, List, Tuple, Optional class MMU: + max_size = 0xFFFFFFFF + # make each block accessible by it's base addr + sections: List[LoadedMemorySection] + + binaries: List[LoadedExecutable] + last_bin: Optional[LoadedExecutable] = None + def __init__(self): + self.sections = list() + self.binaries = list() + self.last_bin = None + + def load_bin(self, bin: Executable): + if self.last_bin is None: + addr = 0x100 # start at 0x100 instead of 0x00 + else: + addr = self.last_bin.size + self.last_bin.base_addr + # align to 8 byte word + addr = align_addr(addr) + + loaded_bin = LoadedExecutable(bin, addr) + + if loaded_bin.size + addr > self.max_size: + raise OutOfMemoryEsception('load of executable') + + self.binaries.append(loaded_bin) + self.last_bin = loaded_bin + + # read sections into sec dict + for sec in loaded_bin.sections: + self.sections.append(sec) + + return loaded_bin + + def get_sec_containing(self, addr: int): + for sec in self.sections: + if sec.base <= addr < sec.base + sec.size: + return sec + + def read_ins(self, addr: int): + sec = self.get_sec_containing(addr) + return sec.read_instruction(addr - sec.base) + + def read(self, addr: int, size: int): + sec = self.get_sec_containing(addr) + return sec.read(addr - sec.base, size) + + def write(self, addr: int, size: int, data): + sec = self.get_sec_containing(addr) + return sec.write(addr - sec.base, size, data) + + diff --git a/riscemu/Tokenizer.py b/riscemu/Tokenizer.py index 00c3d16..c870ba4 100644 --- a/riscemu/Tokenizer.py +++ b/riscemu/Tokenizer.py @@ -2,7 +2,7 @@ import re from enum import IntEnum from typing import List -from . import CPU, Registers +from .CPU import CPU, Registers REGISTERS = list(Registers.all_registers()) @@ -105,10 +105,14 @@ class RiscVInput: return self.content[at:at + size] def peek_one_of(self, options: List[str]): + longest_peek = 0 + ret = False for text in options: if self.peek(text=text): - return text - return False + if len(text) > longest_peek: + longest_peek = len(text) + ret = text + return ret def consume(self, size: int = 1, regex: re.Pattern = None, text: str = None, regex_group: int = 0): at = self.pos @@ -138,10 +142,15 @@ class RiscVInput: return self.content[at:at + size] def consume_one_of(self, options: List[str]): + longest_peek = 0 + ret = False for text in options: if self.peek(text=text): - return self.consume(text=text) - return False + if len(text) > longest_peek: + longest_peek = len(text) + ret = text + self.consume(text=ret) + return ret def seek_newline(self): return self.consume(regex=REG_WHITESPACE_UNTIL_NEWLINE, regex_group=1) diff --git a/riscemu/__init__.py b/riscemu/__init__.py index da9547e..7277732 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -1,10 +1,13 @@ -from .CPU import CPU, Registers, Syscall, SyscallInterface +from .Exceptions import ASSERT_NOT_NULL, ASSERT_LEN, ASSERT_IN, ASSERT_EQ, ASSERT_NOT_IN from .Tokenizer import RiscVToken, RiscVInput, RiscVTokenizer, RiscVInstructionToken, RiscVSymbolToken, \ RiscVPseudoOpToken, TokenType -from .MMU import MemoryFlags, MemoryRegion, MMU -from .Exceptions import ASSERT_NOT_NULL, ASSERT_LEN, ASSERT_IN, ASSERT_EQ, ASSERT_NOT_IN +from .Executable import Executable, LoadedExecutable, LoadedMemorySection, LoadedInstruction + +from .ExecutableParser import ExecutableParser + +from .MMU import MMU -from .Executable import ExecutableParser, Executable +from .CPU import CPU, Registers, Syscall, SyscallInterface \ No newline at end of file diff --git a/riscemu/helpers.py b/riscemu/helpers.py new file mode 100644 index 0000000..2765d1c --- /dev/null +++ b/riscemu/helpers.py @@ -0,0 +1,38 @@ +def align_addr(addr: int, to_bytes: int = 8): + """ + align an address to `to_bytes` (meaning addr & to_bytes = 0) + """ + return addr + (-addr % to_bytes) + + +def parse_numeric_argument(arg: str): + """ + parse hex or int strings + """ + if arg.startswith('0x') or arg.startswith('0X'): + return int(arg, 16) + return int(arg) + + +def int_to_bytes(val, bytes=4): + """ + int -> byte (two's complement) + """ + return bytearray([ + (val >> ((bytes-i-1) * 8)) & 0xFF for i in range(bytes) + ]) + + +def int_from_bytes(bytes): + """ + byte -> int (two's complement) + """ + num = 0 + for b in bytes: + num = num << 8 + num += b + sign = num >> (len(bytes) * 8 - 1) + if sign: + return num - 2 ** (8 * len(bytes)) + return num + diff --git a/riscemu/main.py b/riscemu/main.py deleted file mode 100644 index 66dd644..0000000 --- a/riscemu/main.py +++ /dev/null @@ -1,3 +0,0 @@ -from .CPU import * -from .Tokenizer import * - diff --git a/run.py b/run.py index 96c492a..9fe026a 100644 --- a/run.py +++ b/run.py @@ -7,20 +7,21 @@ fibs: .space 56 .text main: - add s1, zero, 0 # storage index - add s2, zero, 56 # last storage index - add t0, zero, 1 # t0 = F_{i} - add t1, zero, 1 # t1 = F_{i+1} + addi s1, zero, 0 # storage index + addi s2, zero, 56 # last storage index + addi t0, zero, 1 # t0 = F_{i} + addi t1, zero, 1 # t1 = F_{i+1} loop: sw t0, fibs(s1) # save add t2, t1, t0 # t2 = F_{i+2} - add t0, t1, 0 # t0 = t1 - add t1, t2, 0 # t1 = t2 - add s1, s1, 4 # increment storage pointer + addi t0, t1, 0 # t0 = t1 + addi t1, t2, 0 # t1 = t2 + addi s1, s1, 4 # increment storage pointer blt s1, s2, loop # loop as long as we did not reach array length # exit gracefully - add a0, zero, 0 - add a7, zero, 93 + addi a0, zero, 0 + addi a7, zero, 93 + dbg # launch debugger scall # exit with code 0 """ tk = RiscVTokenizer(RiscVInput(example_progr)) @@ -33,5 +34,13 @@ loop: ep = ExecutableParser(tk) ep.parse() - print(ep) + exe = ep.get_execuable() + + cpu = CPU() + le = cpu.load(exe) + + cpu.run_loaded(le) + + print('a') +