From 1c2dad94e29104a4ee6c2a57805ab6b0a1c441a2 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Mon, 29 May 2023 16:04:55 +0100 Subject: [PATCH] main: major overhaul of how __main__ works --- .idea/misc.xml | 2 +- CHANGELOG.md | 7 +- examples/fibs.asm | 28 ++-- examples/malloc.asm | 44 ++++++ libc/README.md | 2 +- libc/stdlib.s | 6 +- riscemu/__init__.py | 2 +- riscemu/__main__.py | 186 ++-------------------- riscemu/config.py | 6 +- riscemu/riscemu_main.py | 254 ++++++++++++++++++++++++++++++ riscemu/tools/riscemu | 2 + setup.py | 8 +- test/filecheck/.gitignore | 2 +- test/filecheck/fibs.asm | 7 +- test/filecheck/libc/test-string.s | 2 +- 15 files changed, 350 insertions(+), 208 deletions(-) create mode 100644 examples/malloc.asm create mode 100644 riscemu/riscemu_main.py create mode 100755 riscemu/tools/riscemu diff --git a/.idea/misc.xml b/.idea/misc.xml index a15ea67..998148d 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - \ No newline at end of file + diff --git a/CHANGELOG.md b/CHANGELOG.md index bf42c94..ee08d10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,13 @@ ## Upcoming 2.0.6 + - Added a very basic libc containing a `crt0.s`, and a few functions + such as `malloc`, `rand`, and `memcpy`. + - Added a subset of the `mmap2` syscall (code 192) to allocate new memory + - Refactored the launching code to improve using riscemu from code + **Planned:** - Add a floating point unit - - Add a crt0.s - - Add `mmap2` syscall with code 192 ## 2.0.5 diff --git a/examples/fibs.asm b/examples/fibs.asm index 48a46e5..893cda7 100644 --- a/examples/fibs.asm +++ b/examples/fibs.asm @@ -1,23 +1,23 @@ -; Example program (c) by Anton Lydike -; this calculates the fibonacci sequence and stores it in ram +// Example program (c) by Anton Lydike +// this calculates the fibonacci sequence and stores it in ram .data fibs: .space 56 .text main: - addi s1, zero, 0 ; storage index - addi s2, zero, 56 ; last storage index - addi t0, zero, 1 ; t0 = F_{i} - addi t1, zero, 1 ; t1 = F_{i+1} + addi s1, zero, 0 // storage index + addi s2, zero, 56 // last storage index + addi t0, zero, 1 // t0 = F_{i} + addi t1, zero, 1 // t1 = F_{i+1} loop: - sw t0, fibs(s1) ; save - add t2, t1, t0 ; t2 = F_{i+2} - addi t0, t1, 0 ; t0 = t1 - addi t1, t2, 0 ; t1 = t2 - addi s1, s1, 4 ; increment storage pointer - blt s1, s2, loop ; loop as long as we did not reach array length - ; exit gracefully + sw t0, fibs(s1) // save + add t2, t1, t0 // t2 = F_{i+2} + addi t0, t1, 0 // t0 = t1 + addi t1, t2, 0 // t1 = t2 + addi s1, s1, 4 // increment storage pointer + blt s1, s2, loop // loop as long as we did not reach array length + // exit gracefully addi a0, zero, 0 addi a7, zero, 93 - scall ; exit with code 0 + scall // exit with code 0 diff --git a/examples/malloc.asm b/examples/malloc.asm new file mode 100644 index 0000000..653d6e6 --- /dev/null +++ b/examples/malloc.asm @@ -0,0 +1,44 @@ +// example of a simple memory allocation +// we use the mmap2 syscall for this + +.text + // call mmap2 + li a0, 0 // addr = 0, let OS choose address + li a1, 4096 // size + li a2, 3 // PROT_READ | PROT_WRITE + li a3, 5 // MAP_PRIVATE | MAP_ANONYMOUS + li a7, SCALL_MMAP2 + ecall // invoke syscall + + li t0, -1 // exit if unsuccessful + beq a0, t0, _exit + + // print address + print.uhex a0 + # we can look at the state of the mmu here: + ebreak + # > mmu.sections + # InstructionMemorySection[.text] at 0x00000100 + # BinaryDataMemorySection[.stack] at 0x00000170 + # BinaryDataMemorySection[.data.runtime-allocated] at 0x00080170 + + sw t0, 144(a0) + sw t0, 0(a0) + sw t0, 8(a0) + sw t0, 16(a0) + sw t0, 32(a0) + sw t0, 64(a0) + sw t0, 128(a0) + sw t0, 256(a0) + sw t0, 512(a0) + sw t0, 1024(a0) + sw t0, 2048(a0) + sw t0, 4000(a0) + + lw t1, 128(a0) + print.uhex t0 + ebreak + +_exit: + li a7, 93 + ecall diff --git a/libc/README.md b/libc/README.md index 57cab3b..b733124 100644 --- a/libc/README.md +++ b/libc/README.md @@ -26,4 +26,4 @@ Somewhat nice implementations of: ## Correctness: -This library is only lightly tested, so be careful and report bugs when you find them! \ No newline at end of file +This library is only lightly tested, so be careful and report bugs when you find them! diff --git a/libc/stdlib.s b/libc/stdlib.s index 6845c73..9d506e9 100644 --- a/libc/stdlib.s +++ b/libc/stdlib.s @@ -6,7 +6,7 @@ .data -_rand_seed: +_rand_seed: .word 0x76767676 _atexit_calls: // leave room for 8 atexit handlers here for now @@ -152,7 +152,7 @@ _atexit_fail: - + // rand, srand @@ -176,5 +176,3 @@ srand: la t1, _rand_seed sw a0, 0(t1) ret - - diff --git a/riscemu/__init__.py b/riscemu/__init__.py index 13deeba..96e7339 100644 --- a/riscemu/__init__.py +++ b/riscemu/__init__.py @@ -33,5 +33,5 @@ from .config import RunConfig from .parser import tokenize, parse_tokens, AssemblyFileLoader __author__ = "Anton Lydike " -__copyright__ = "Copyright 2022 Anton Lydike" +__copyright__ = "Copyright 2023 Anton Lydike" __version__ = "2.0.5" diff --git a/riscemu/__main__.py b/riscemu/__main__.py index 8387e0e..b7e41b3 100644 --- a/riscemu/__main__.py +++ b/riscemu/__main__.py @@ -5,182 +5,18 @@ SPDX-License-Identifier: MIT This file holds the logic for starting the emulator from the CLI """ -from riscemu import RiscemuBaseException, __copyright__, __version__ -from riscemu.CPU import UserModeCPU +import sys -if __name__ == "__main__": - from .config import RunConfig - from .instructions import InstructionSetDict - from .colors import FMT_BOLD, FMT_MAGENTA - from .parser import AssemblyFileLoader - import argparse - import sys +from riscemu import RiscemuBaseException +from riscemu.riscemu_main import RiscemuMain - all_ins_names = list(InstructionSetDict.keys()) +try: + main = RiscemuMain() + main.run(sys.argv[1:]) + sys.exit(main.cpu.exit_code if not main.cfg.ignore_exit_code else 0) - if "--version" in sys.argv: - print( - "riscemu version {}\n{}\n\nAvailable ISA: {}".format( - __version__, __copyright__, ", ".join(InstructionSetDict.keys()) - ) - ) - sys.exit() +except RiscemuBaseException as e: + print("Error: {}".format(e.message())) + e.print_stacktrace() - class OptionStringAction(argparse.Action): - def __init__(self, option_strings, dest, keys=None, omit_empty=False, **kwargs): - if keys is None: - raise ValueError('must define "keys" argument') - if isinstance(keys, dict): - keys_d = keys - elif isinstance(keys, (list, tuple)): - keys_d = {} - for k in keys: - if isinstance(k, tuple): - k, v = k - else: - v = False - keys_d[k] = v - else: - keys_d = dict() - super().__init__(option_strings, dest, default=keys_d, **kwargs) - self.keys = keys_d - self.omit_empty = omit_empty - - def __call__(self, parser, namespace, values, option_string=None): - d = {} - if not self.omit_empty: - d.update(self.keys) - for x in values.split(","): - if x in self.keys: - d[x] = True - else: - raise ValueError("Invalid parameter supplied: " + x) - setattr(namespace, self.dest, d) - - parser = argparse.ArgumentParser( - description="RISC-V Userspace parser and emulator", - prog="riscemu", - formatter_class=argparse.RawTextHelpFormatter, - ) - parser.add_argument( - "files", - metavar="file.asm", - type=str, - nargs="+", - help="The assembly files to load, the last one will be run", - ) - - parser.add_argument( - "--options", - "-o", - action=OptionStringAction, - keys=( - "disable_debug", - "no_syscall_symbols", - "fail_on_ex", - "add_accept_imm", - "unlimited_regs", - ), - help="""Toggle options. Available options are: -disable_debug: Disable ebreak instructions -no_syscall_symbols: Don't add symbols for SCALL_EXIT and others -fail_on_ex: If set, exceptions won't trigger the debugger -add_accept_imm: Accept "add rd, rs, imm" instruction (instead of addi) -unlimited_regs: Allow an unlimited number of registers""", - ) - - parser.add_argument( - "--syscall-opts", - "-so", - action=OptionStringAction, - keys=("fs_access", "disable_input"), - ) - - parser.add_argument( - "--instruction-sets", - "-is", - action=OptionStringAction, - help="Instruction sets to load, available are: {}. All are enabled by default".format( - ", ".join(all_ins_names) - ), - keys={k: True for k in all_ins_names}, - omit_empty=True, - ) - - parser.add_argument( - "--stack_size", - type=int, - help="Stack size of loaded programs, defaults to 8MB", - nargs="?", - ) - - parser.add_argument( - "-v", - "--verbose", - help="Verbosity level (can be used multiple times)", - action="count", - default=0, - ) - - parser.add_argument( - "--interactive", - help="Launch the interactive debugger instantly instead of loading any " - "programs", - action="store_true", - ) - - parser.add_argument( - "--ignore-exit-code", - help="Ignore exit code of the program and always return 0 if the program ran to completion.", - action="store_true", - default=False, - ) - - args = parser.parse_args() - - # create a RunConfig from the cli args - cfg_dict = dict( - stack_size=args.stack_size, - debug_instruction=not args.options["disable_debug"], - include_scall_symbols=not args.options["no_syscall_symbols"], - debug_on_exception=not args.options["fail_on_ex"], - add_accept_imm=args.options["add_accept_imm"], - unlimited_registers=args.options["unlimited_regs"], - scall_fs=args.syscall_opts["fs_access"], - scall_input=not args.syscall_opts["disable_input"], - verbosity=args.verbose, - ) - for k, v in dict(cfg_dict).items(): - if v is None: - del cfg_dict[k] - - cfg = RunConfig(**cfg_dict) - - if not hasattr(args, "ins"): - setattr(args, "ins", {k: True for k in all_ins_names}) - - FMT_PRINT = FMT_BOLD + FMT_MAGENTA - - # parse required instruction sets - ins_to_load = [InstructionSetDict[name] for name, b in args.ins.items() if b] - - try: - cpu = UserModeCPU(ins_to_load, cfg) - - opts = AssemblyFileLoader.get_options(sys.argv) - for file in args.files: - loader = AssemblyFileLoader.instantiate(file, opts) - cpu.load_program(loader.parse()) - - # set up a stack - cpu.setup_stack(cfg.stack_size) - - # launch the last loaded program - cpu.launch(verbose=cfg.verbosity > 1) - sys.exit(cpu.exit_code if not args.ignore_exit_code else 0) - - except RiscemuBaseException as e: - print("Error: {}".format(e.message())) - e.print_stacktrace() - - sys.exit(-1) + sys.exit(-1) diff --git a/riscemu/config.py b/riscemu/config.py index 9bda381..ee80a96 100644 --- a/riscemu/config.py +++ b/riscemu/config.py @@ -21,6 +21,6 @@ class RunConfig: verbosity: int = 0 slowdown: float = 1 unlimited_registers: bool = False - - -CONFIG = RunConfig() + # runtime config + use_libc: bool = False + ignore_exit_code: bool = False diff --git a/riscemu/riscemu_main.py b/riscemu/riscemu_main.py new file mode 100644 index 0000000..eca24a6 --- /dev/null +++ b/riscemu/riscemu_main.py @@ -0,0 +1,254 @@ +import argparse +import glob +import os +import sys +from typing import List, Type + +from riscemu import AssemblyFileLoader, __version__, __copyright__ +from riscemu.types import CPU, ProgramLoader, Program +from riscemu.instructions import InstructionSet, InstructionSetDict +from riscemu.config import RunConfig +from riscemu.CPU import UserModeCPU + + +class RiscemuMain: + """ + This represents the riscemu API exposed to other programs for better + interoperability. + """ + + available_ins_sets: dict[str, type[InstructionSet]] + available_file_loaders: list[type[ProgramLoader]] + + cfg: RunConfig | None + cpu: CPU | None + + input_files: list[str] + selected_ins_sets: list[Type[InstructionSet]] + + def __init__(self): + self.available_ins_sets = dict() + self.selected_ins_sets = [] + self.available_file_loaders = [] + self.cfg: RunConfig | None = None + self.cpu: CPU | None = None + self.input_files = [] + self.selected_ins_sets = [] + + def instantiate_cpu(self): + self.cpu = UserModeCPU(self.selected_ins_sets, self.cfg) + self.configure_cpu() + + def configure_cpu(self): + assert self.cfg is not None + if isinstance(self.cpu, UserModeCPU) and self.cfg.stack_size != 0: + self.cpu.setup_stack(self.cfg.stack_size) + + def register_all_arguments(self, parser: argparse.ArgumentParser): + parser.add_argument( + "files", + metavar="file.asm", + type=str, + nargs="+", + help="The assembly files to load, the last one will be run", + ) + + parser.add_argument( + "--options", + "-o", + action=OptionStringAction, + keys=( + "disable_debug", + "no_syscall_symbols", + "fail_on_ex", + "add_accept_imm", + "unlimited_regs", + "libc", + "ignore_exit_code", + ), + help="""Toggle options. Available options are: + disable_debug: Disable ebreak instructions + no_syscall_symbols: Don't add symbols for SCALL_EXIT and others + fail_on_ex: If set, exceptions won't trigger the debugger + add_accept_imm: Accept "add rd, rs, imm" instruction (instead of addi) + unlimited_regs: Allow an unlimited number of registers + libc: Load a libc-like runtime (for malloc, etc.) + ignore_exit_code: Don't exit with the programs exit code.""", + ) + + parser.add_argument( + "--syscall-opts", + "-so", + action=OptionStringAction, + keys=("fs_access", "disable_input"), + ) + + parser.add_argument( + "--instruction-sets", + "-is", + action=OptionStringAction, + help="Instruction sets to load, available are: {}. All are enabled by default".format( + ", ".join(self.available_ins_sets) + ), + keys={k: True for k in self.available_ins_sets}, + omit_empty=True, + ) + + parser.add_argument( + "--stack_size", + type=int, + help="Stack size of loaded programs, defaults to 8MB", + nargs="?", + ) + + parser.add_argument( + "-v", + "--verbose", + help="Verbosity level (can be used multiple times)", + action="count", + default=0, + ) + + parser.add_argument( + "--interactive", + help="Launch the interactive debugger instantly instead of loading any " + "programs", + action="store_true", + ) + + parser.add_argument( + "--ignore-exit-code", + help="Ignore exit code of the program and always return 0 if the program ran to completion.", + action="store_true", + default=False, + ) + + def register_all_isas(self): + self.available_ins_sets.update(InstructionSetDict) + + def register_all_program_loaders(self): + self.available_file_loaders.append(AssemblyFileLoader) + + def parse_argv(self, argv: list[str]): + parser = argparse.ArgumentParser( + description="RISC-V Userspace emulator", + prog="riscemu", + formatter_class=argparse.RawTextHelpFormatter, + ) + if "--version" in argv: + print( + "riscemu version {}\n{}\n\nAvailable ISA: {}".format( + __version__, __copyright__, ", ".join(self.available_ins_sets) + ) + ) + sys.exit() + + self.register_all_arguments(parser) + + # parse argv + args = parser.parse_args(argv) + + # add ins + if not hasattr(args, "ins"): + setattr(args, "ins", {k: True for k in self.available_ins_sets}) + + # create RunConfig + self.cfg = self.config_from_parsed_args(args) + + # set input files + self.input_files = args.files + + # get selected ins sets + self.selected_ins_sets = list( + self.available_ins_sets[name] + for name, selected in args.ins.items() + if selected + ) + + # if use_libc is given, attach libc to path + if self.cfg.use_libc: + libc_path = os.path.join( + os.path.dirname(__file__), + "..", + "libc", + "*.s", + ) + for path in glob.iglob(libc_path): + self.input_files.append(path) + + def config_from_parsed_args(self, args: argparse.Namespace) -> RunConfig: + # create a RunConfig from the cli args + cfg_dict = dict( + stack_size=args.stack_size, + debug_instruction=not args.options["disable_debug"], + include_scall_symbols=not args.options["no_syscall_symbols"], + debug_on_exception=not args.options["fail_on_ex"], + add_accept_imm=args.options["add_accept_imm"], + unlimited_registers=args.options["unlimited_regs"], + scall_fs=args.syscall_opts["fs_access"], + scall_input=not args.syscall_opts["disable_input"], + verbosity=args.verbose, + use_libc=args.options["libc"], + ignore_exit_code=args.options["ignore_exit_code"], + ) + for k, v in dict(cfg_dict).items(): + if v is None: + del cfg_dict[k] + + return RunConfig(**cfg_dict) + + def load_programs(self): + for path in self.input_files: + for loader in self.available_file_loaders: + if not loader.can_parse(path): + continue + programs = loader.instantiate(path, {}).parse() + if isinstance(programs, Program): + programs = [programs] + for p in programs: + self.cpu.mmu.load_program(p) + + def run(self, argv: list[str]): + # register everything + self.register_all_isas() + self.register_all_program_loaders() + + # parse argv and set up cpu + self.parse_argv(argv) + self.instantiate_cpu() + self.load_programs() + + # run the program + self.cpu.launch(self.cfg.verbosity > 1) + + +class OptionStringAction(argparse.Action): + def __init__(self, option_strings, dest, keys=None, omit_empty=False, **kwargs): + if keys is None: + raise ValueError('must define "keys" argument') + if isinstance(keys, dict): + keys_d = keys + elif isinstance(keys, (list, tuple)): + keys_d = {} + for k in keys: + if isinstance(k, tuple): + k, v = k + else: + v = False + keys_d[k] = v + else: + keys_d = dict() + super().__init__(option_strings, dest, default=keys_d, **kwargs) + self.keys = keys_d + self.omit_empty = omit_empty + + def __call__(self, parser, namespace, values, option_string=None): + d = {} + if not self.omit_empty: + d.update(self.keys) + for x in values.split(","): + if x in self.keys: + d[x] = True + else: + raise ValueError("Invalid parameter supplied: " + x) + setattr(namespace, self.dest, d) diff --git a/riscemu/tools/riscemu b/riscemu/tools/riscemu new file mode 100755 index 0000000..516f5f0 --- /dev/null +++ b/riscemu/tools/riscemu @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +python3 -m riscemu "$@" diff --git a/setup.py b/setup.py index cb3b6ae..5bc0493 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setuptools.setup( version=riscemu.__version__, author=riscemu.__author__, author_email="pip@antonlydike.de", - description="RISC-V userspace and privileged emulator", + description="RISC-V userspace and machine mode emulator", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/antonlydike/riscemu", @@ -31,6 +31,10 @@ setuptools.setup( "riscemu.priv", "riscemu.types", ], - python_requires=">=3.6", + package_data={ + "riscemu": ["libc/*.s"], + }, + scripts=["riscemu/tools/riscemu"], + python_requires=">=3.8", install_requires=["pyelftools~=0.27"], ) diff --git a/test/filecheck/.gitignore b/test/filecheck/.gitignore index c24e36d..79a8994 100644 --- a/test/filecheck/.gitignore +++ b/test/filecheck/.gitignore @@ -1,2 +1,2 @@ .lit_test_times.txt -Output \ No newline at end of file +Output diff --git a/test/filecheck/fibs.asm b/test/filecheck/fibs.asm index 21cd362..6f9648c 100644 --- a/test/filecheck/fibs.asm +++ b/test/filecheck/fibs.asm @@ -1,8 +1,8 @@ -// RUN: python3 -m riscemu -v --ignore-exit-code %s || true | filecheck %s +// RUN: python3 -m riscemu -v -o ignore_exit_code %s | filecheck %s .data fibs: .space 1024 - .text +.text main: addi s1, zero, 0 // storage index addi s2, zero, 1024 // last storage index @@ -15,9 +15,10 @@ loop: addi t1, t2, 0 // t1 = t2 addi s1, s1, 4 // increment storage pointer blt s1, s2, loop // loop as long as we did not reach array length + ebreak // exit gracefully add a0, zero, t2 addi a7, zero, 93 - scall // exit with code 0 + scall // exit with code fibs(n) & 2^32 // CHECK: [CPU] Program exited with code 1265227608 diff --git a/test/filecheck/libc/test-string.s b/test/filecheck/libc/test-string.s index b3cb66f..cc7f928 100644 --- a/test/filecheck/libc/test-string.s +++ b/test/filecheck/libc/test-string.s @@ -1,4 +1,4 @@ -// RUN: python3 -m riscemu -v %s libc/string.s libc/stdlib.s libc/crt0.s | filecheck %s +// RUN: python3 -m riscemu -v %s -o libc | filecheck %s .data