finished basic RISC-V parser

assembly-parser-rework
Anton Lydike 3 years ago
parent dc4dca6fea
commit 0488a9d6bc

@ -2,6 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/test" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="inheritedJdk" />

@ -11,7 +11,7 @@ It contains everything needed to run assembly files, so you don't need any custo
from .exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \
ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException
from .base_types import Executable, LoadedExecutable, LoadedMemorySection
#from .base_types import Executable, LoadedExecutable, LoadedMemorySection
from .instructions import *
@ -22,6 +22,8 @@ from .CPU import CPU
from .config import RunConfig
from .parser import tokenize, parse_tokens, parse_program_from_file
__author__ = "Anton Lydike <Anton@Lydike.com>"
__copyright__ = "Copyright 2021 Anton Lydike"
__version__ = '1.0.0'

@ -1,16 +1,14 @@
from typing import Optional, Tuple, Union
from typing import Optional, Tuple, Union, List
from enum import Enum, auto
from typing import Optional, Tuple, Union
from helpers import parse_numeric_argument
from .base_types import Program, T_RelativeAddress, InstructionContext
from .helpers import parse_numeric_argument, align_addr, int_to_bytes
from .base_types import Program, T_RelativeAddress, InstructionContext, Instruction
from .colors import FMT_PARSE, FMT_NONE
from .exceptions import ParseException
from .helpers import ASSERT_LEN
from .exceptions import ParseException, ASSERT_LEN, ASSERT_NOT_NULL
from .tokenizer import Token
from .types import BinaryDataMemorySection, InstructionMemorySection
INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini')
@ -21,13 +19,25 @@ class MemorySectionType(Enum):
class CurrentSection:
name: str
data: Union[list, bytearray]
data: Union[List[Instruction], bytearray]
type: MemorySectionType
base: int
def __init__(self, name: str, type: MemorySectionType, base: int = 0):
self.name = name
self.type = type
self.base = base
if self.type == MemorySectionType.Data:
self.data = bytearray()
elif self.type == MemorySectionType.Instructions:
self.data = list()
else:
raise ParseException("Unknown section type: {}".format(type))
def current_address(self) -> T_RelativeAddress:
if self.type == MemorySectionType.Data:
return len(self.data)
return len(self.data) * 4
return len(self.data) + self.base
return len(self.data) * 4 + self.base
def __repr__(self):
return "{}(name={},data={},type={})".format(
@ -47,18 +57,27 @@ class ParseContext:
self.section = None
def finalize(self) -> Program:
self.finalize_section()
self._finalize_section()
return self.program
def finalize_section(self):
def _finalize_section(self):
if self.section is None:
return
if self.section.type == MemorySectionType.Data:
section = BinaryDataMemorySection(self.section.data, self.section.name, self.context)
section = BinaryDataMemorySection(self.section.data, self.section.name, self.context, self.program)
self.program.add_section(section)
elif self.section.type == MemorySectionType.Instructions:
section = InstructionMemorySection(self.section.data, self.section.name, self.context)
section = InstructionMemorySection(self.section.data, self.section.name, self.context, self.program)
self.program.add_section(section)
self.section = None
def new_section(self, name: str, type: MemorySectionType):
base = 0
if self.section is not None:
base = align_addr(self.section.current_address(), 4)
print("base at {}".format(base))
self._finalize_section()
self.section = CurrentSection(name, type, base)
def __repr__(self):
return "{}(\n\tsetion={},\n\tprogram={}\n)".format(
@ -100,21 +119,20 @@ class AssemblerDirectives:
@classmethod
def op_section(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
context.finalize_section()
if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES:
context.section.type = MemorySectionType.Instructions
context.section.data = list()
context.new_section(args[0], MemorySectionType.Instructions)
else:
context.section.type = MemorySectionType.Data
context.section.data = bytearray()
context.section.name = args[0]
context.new_section(args[0], MemorySectionType.Data)
@classmethod
def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
context.program.global_labels.add(args[0])
@classmethod
def op_global(cls, token: Token, args: Tuple[str], context: ParseContext):
cls.op_globl(token, args, context)
@classmethod
def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 2)
@ -122,6 +140,14 @@ class AssemblerDirectives:
value = parse_numeric_argument(args[1])
context.context.labels[name] = value
@classmethod
def op_space(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
size = parse_numeric_argument(args[0])
cls.add_bytes(size, None, context)
@classmethod
def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
@ -130,11 +156,14 @@ class AssemblerDirectives:
cls.add_bytes(size, bytearray(size), context)
@classmethod
def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext):
def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext, unsigned=False):
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
if content is None:
content = bytearray(size)
if isinstance(context, int):
content = int_to_bytes(content, size, unsigned)
context.section.data += content
@classmethod
def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True):

@ -7,12 +7,14 @@ This file contains base classes which represent loaded programs
"""
import re
from abc import ABC
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Set
from collections import defaultdict
from .helpers import *
from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE
from .exceptions import ParseException
from .helpers import format_bytes
T_RelativeAddress = int
T_AbsoluteAddress = int

@ -4,8 +4,6 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import typing
from abc import abstractmethod
from .base_types import Instruction
from .colors import *
@ -21,7 +19,7 @@ class RiscemuBaseException(BaseException):
class ParseException(RiscemuBaseException):
def __init__(self, msg, data=None):
super().__init__()
super().__init__(msg, data)
self.msg = msg
self.data = data

@ -6,7 +6,7 @@ SPDX-License-Identifier: MIT
from math import log10, ceil
from .exceptions import *
from typing import Iterable, Iterator, TypeVar, Generic, List
from typing import Iterable, Iterator, TypeVar, Generic, List, Optional
def align_addr(addr: int, to_bytes: int = 8) -> int:
@ -124,7 +124,7 @@ class Peekable(Generic[T], Iterator[T]):
return self.cache.pop()
return next(self.iterable)
def peek(self) -> T:
def peek(self) -> Optional[T]:
try:
if self.cache:
return self.cache[0]

@ -8,8 +8,9 @@ from typing import Tuple, Callable, Dict
from abc import ABC
from ..CPU import CPU
from ..helpers import ASSERT_LEN, ASSERT_IN, to_unsigned
from ..base_types import LoadedInstruction
from ..helpers import to_unsigned
from ..exceptions import ASSERT_LEN, ASSERT_IN
from ..base_types import Instruction
class InstructionSet(ABC):
@ -30,7 +31,7 @@ class InstructionSet(ABC):
self.name = self.__class__.__name__
self.cpu = cpu
def load(self) -> Dict[str, Callable[['LoadedInstruction'], None]]:
def load(self) -> Dict[str, Callable[['Instruction'], None]]:
"""
This is called by the CPU once it instantiates this instruction set
@ -51,7 +52,7 @@ class InstructionSet(ABC):
if member.startswith('instruction_'):
yield member[12:].replace('_', '.'), getattr(self, member)
def parse_mem_ins(self, ins: 'LoadedInstruction') -> Tuple[str, int]:
def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, int]:
"""
parses both rd, rs, imm and rd, imm(rs) argument format and returns (rd, imm+rs1)
(so a register and address tuple for memory instructions)
@ -69,7 +70,7 @@ class InstructionSet(ABC):
rd = ins.get_reg(0)
return rd, rs + imm
def parse_rd_rs_rs(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]:
def parse_rd_rs_rs(self, ins: 'Instruction', signed=True) -> Tuple[str, int, int]:
"""
Assumes the command is in <name> rd, rs1, rs2 format
Returns the name of rd, and the values in rs1 and rs2
@ -84,7 +85,7 @@ class InstructionSet(ABC):
to_unsigned(self.get_reg_content(ins, 1)), \
to_unsigned(self.get_reg_content(ins, 2))
def parse_rd_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]:
def parse_rd_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[str, int, int]:
"""
Assumes the command is in <name> rd, rs, imm format
Returns the name of rd, the value in rs and the immediate imm
@ -99,7 +100,7 @@ class InstructionSet(ABC):
to_unsigned(self.get_reg_content(ins, 1)), \
to_unsigned(ins.get_imm(2))
def parse_rs_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[int, int, int]:
def parse_rs_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[int, int, int]:
"""
Assumes the command is in <name> rs1, rs2, imm format
Returns the values in rs1, rs2 and the immediate imm
@ -113,7 +114,7 @@ class InstructionSet(ABC):
to_unsigned(self.get_reg_content(ins, 1)), \
to_unsigned(ins.get_imm(2))
def get_reg_content(self, ins: 'LoadedInstruction', ind: int) -> int:
def get_reg_content(self, ins: 'Instruction', ind: int) -> int:
"""
get the register name from ins and then return the register contents
"""

@ -1,4 +1,4 @@
from .InstructionSet import InstructionSet, LoadedInstruction
from .InstructionSet import InstructionSet, Instruction
from ..exceptions import INS_NOT_IMPLEMENTED
from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed
@ -10,13 +10,13 @@ class RV32A(InstructionSet):
for this?
"""
def instruction_lr_w(self, ins: 'LoadedInstruction'):
def instruction_lr_w(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_sc_w(self, ins: 'LoadedInstruction'):
def instruction_sc_w(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_amoswap_w(self, ins: 'LoadedInstruction'):
def instruction_amoswap_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
if dest == 'zero':
self.mmu.write(addr, int_to_bytes(addr, 4))
@ -25,37 +25,37 @@ class RV32A(InstructionSet):
self.mmu.write(addr, int_to_bytes(val, 4))
self.regs.set(dest, old)
def instruction_amoadd_w(self, ins: 'LoadedInstruction'):
def instruction_amoadd_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old + val, 4))
self.regs.set(dest, old)
def instruction_amoand_w(self, ins: 'LoadedInstruction'):
def instruction_amoand_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old & val, 4))
self.regs.set(dest, old)
def instruction_amoor_w(self, ins: 'LoadedInstruction'):
def instruction_amoor_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old | val, 4))
self.regs.set(dest, old)
def instruction_amoxor_w(self, ins: 'LoadedInstruction'):
def instruction_amoxor_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old ^ val, 4))
self.regs.set(dest, old)
def instruction_amomax_w(self, ins: 'LoadedInstruction'):
def instruction_amomax_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(max(old, val), 4))
self.regs.set(dest, old)
def instruction_amomaxu_w(self, ins: 'LoadedInstruction'):
def instruction_amomaxu_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
val = to_unsigned(val)
old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True)
@ -63,13 +63,13 @@ class RV32A(InstructionSet):
self.mmu.write(addr, int_to_bytes(to_signed(max(old, val)), 4))
self.regs.set(dest, old)
def instruction_amomin_w(self, ins: 'LoadedInstruction'):
def instruction_amomin_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(min(old, val), 4))
self.regs.set(dest, old)
def instruction_amominu_w(self, ins: 'LoadedInstruction'):
def instruction_amominu_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins)
val = to_unsigned(val)
old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True)

@ -11,7 +11,7 @@ from ..colors import FMT_DEBUG, FMT_NONE
from ..debug import launch_debug_session
from ..exceptions import LaunchDebuggerException
from ..syscall import Syscall
from ..base_types import LoadedInstruction
from ..base_types import Instruction
class RV32I(InstructionSet):
@ -23,39 +23,39 @@ class RV32I(InstructionSet):
See https://maxvytech.com/images/RV32I-11-2018.pdf for a more detailed overview
"""
def instruction_lb(self, ins: 'LoadedInstruction'):
def instruction_lb(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1)))
def instruction_lh(self, ins: 'LoadedInstruction'):
def instruction_lh(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2)))
def instruction_lw(self, ins: 'LoadedInstruction'):
def instruction_lw(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 4)))
def instruction_lbu(self, ins: 'LoadedInstruction'):
def instruction_lbu(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1), unsigned=True))
def instruction_lhu(self, ins: 'LoadedInstruction'):
def instruction_lhu(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2), unsigned=True))
def instruction_sb(self, ins: 'LoadedInstruction'):
def instruction_sb(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.mmu.write(addr, 1, int_to_bytes(self.regs.get(rd), 1))
def instruction_sh(self, ins: 'LoadedInstruction'):
def instruction_sh(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.mmu.write(addr, 2, int_to_bytes(self.regs.get(rd), 2))
def instruction_sw(self, ins: 'LoadedInstruction'):
def instruction_sw(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins)
self.mmu.write(addr, 4, int_to_bytes(self.regs.get(rd), 4))
def instruction_sll(self, ins: 'LoadedInstruction'):
def instruction_sll(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
@ -65,7 +65,7 @@ class RV32I(InstructionSet):
to_signed(to_unsigned(self.regs.get(src1)) << (self.regs.get(src2) & 0b11111))
)
def instruction_slli(self, ins: 'LoadedInstruction'):
def instruction_slli(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
@ -75,7 +75,7 @@ class RV32I(InstructionSet):
to_signed(to_unsigned(self.regs.get(src1)) << (imm & 0b11111))
)
def instruction_srl(self, ins: 'LoadedInstruction'):
def instruction_srl(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
@ -85,7 +85,7 @@ class RV32I(InstructionSet):
to_signed(to_unsigned(self.regs.get(src1)) >> (self.regs.get(src2) & 0b11111))
)
def instruction_srli(self, ins: 'LoadedInstruction'):
def instruction_srli(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
@ -95,7 +95,7 @@ class RV32I(InstructionSet):
to_signed(to_unsigned(self.regs.get(src1)) >> (imm & 0b11111))
)
def instruction_sra(self, ins: 'LoadedInstruction'):
def instruction_sra(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
@ -105,7 +105,7 @@ class RV32I(InstructionSet):
self.regs.get(src1) >> (self.regs.get(src2) & 0b11111)
)
def instruction_srai(self, ins: 'LoadedInstruction'):
def instruction_srai(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0)
src1 = ins.get_reg(1)
@ -115,7 +115,7 @@ class RV32I(InstructionSet):
self.regs.get(src1) >> (imm & 0b11111)
)
def instruction_add(self, ins: 'LoadedInstruction'):
def instruction_add(self, ins: 'Instruction'):
dst = ""
if self.cpu.conf.add_accept_imm:
try:
@ -130,139 +130,139 @@ class RV32I(InstructionSet):
rs1 + rs2
)
def instruction_addi(self, ins: 'LoadedInstruction'):
def instruction_addi(self, ins: 'Instruction'):
dst, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set(
dst,
rs1 + imm
)
def instruction_sub(self, ins: 'LoadedInstruction'):
def instruction_sub(self, ins: 'Instruction'):
dst, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
dst,
rs1 - rs2
)
def instruction_lui(self, ins: 'LoadedInstruction'):
def instruction_lui(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
imm = ins.get_imm(1)
self.regs.set(reg, imm << 12)
def instruction_auipc(self, ins: 'LoadedInstruction'):
def instruction_auipc(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
imm = to_unsigned(ins.get_imm(1))
self.regs.set(reg, self.pc + (imm << 12))
def instruction_xor(self, ins: 'LoadedInstruction'):
def instruction_xor(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 ^ rs2
)
def instruction_xori(self, ins: 'LoadedInstruction'):
def instruction_xori(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set(
rd,
rs1 ^ imm
)
def instruction_or(self, ins: 'LoadedInstruction'):
def instruction_or(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 | rs2
)
def instruction_ori(self, ins: 'LoadedInstruction'):
def instruction_ori(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set(
rd,
rs1 | imm
)
def instruction_and(self, ins: 'LoadedInstruction'):
def instruction_and(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 & rs2
)
def instruction_andi(self, ins: 'LoadedInstruction'):
def instruction_andi(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set(
rd,
rs1 & imm
)
def instruction_slt(self, ins: 'LoadedInstruction'):
def instruction_slt(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
int(rs1 < rs2)
)
def instruction_slti(self, ins: 'LoadedInstruction'):
def instruction_slti(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set(
rd,
int(rs1 < imm)
)
def instruction_sltu(self, ins: 'LoadedInstruction'):
def instruction_sltu(self, ins: 'Instruction'):
dst, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
self.regs.set(
dst,
int(rs1 < rs2)
)
def instruction_sltiu(self, ins: 'LoadedInstruction'):
def instruction_sltiu(self, ins: 'Instruction'):
dst, rs1, imm = self.parse_rd_rs_imm(ins, signed=False)
self.regs.set(
dst,
int(rs1 < imm)
)
def instruction_beq(self, ins: 'LoadedInstruction'):
def instruction_beq(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 == rs2:
self.pc = dst
def instruction_bne(self, ins: 'LoadedInstruction'):
def instruction_bne(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 != rs2:
self.pc = dst
def instruction_blt(self, ins: 'LoadedInstruction'):
def instruction_blt(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 < rs2:
self.pc = dst
def instruction_bge(self, ins: 'LoadedInstruction'):
def instruction_bge(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 >= rs2:
self.pc = dst
def instruction_bltu(self, ins: 'LoadedInstruction'):
def instruction_bltu(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
if rs1 < rs2:
self.pc = dst
def instruction_bgeu(self, ins: 'LoadedInstruction'):
def instruction_bgeu(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
if rs1 >= rs2:
self.pc = dst
# technically deprecated
def instruction_j(self, ins: 'LoadedInstruction'):
def instruction_j(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 1)
addr = ins.get_imm(0)
self.pc = addr
def instruction_jal(self, ins: 'LoadedInstruction'):
def instruction_jal(self, ins: 'Instruction'):
reg = 'ra' # default register is ra
if len(ins.args) == 1:
addr = ins.get_imm(0)
@ -273,29 +273,29 @@ class RV32I(InstructionSet):
self.regs.set(reg, self.pc)
self.pc = addr
def instruction_jalr(self, ins: 'LoadedInstruction'):
def instruction_jalr(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
addr = ins.get_imm(1)
self.regs.set(reg, self.pc)
self.pc = addr
def instruction_ret(self, ins: 'LoadedInstruction'):
def instruction_ret(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0)
self.pc = self.regs.get('ra')
def instruction_ecall(self, ins: 'LoadedInstruction'):
def instruction_ecall(self, ins: 'Instruction'):
self.instruction_scall(ins)
def instruction_ebreak(self, ins: 'LoadedInstruction'):
def instruction_ebreak(self, ins: 'Instruction'):
self.instruction_sbreak(ins)
def instruction_scall(self, ins: 'LoadedInstruction'):
def instruction_scall(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0)
syscall = Syscall(self.regs.get('a7'), self.cpu)
self.cpu.syscall_int.handle_syscall(syscall)
def instruction_sbreak(self, ins: 'LoadedInstruction'):
def instruction_sbreak(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0)
if self.cpu.active_debug:
print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE)
@ -307,23 +307,23 @@ class RV32I(InstructionSet):
"Debug instruction encountered at 0x{:08X}".format(self.pc - 1)
)
def instruction_nop(self, ins: 'LoadedInstruction'):
def instruction_nop(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0)
pass
def instruction_li(self, ins: 'LoadedInstruction'):
def instruction_li(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
immediate = ins.get_imm(1)
self.regs.set(reg, immediate)
def instruction_la(self, ins: 'LoadedInstruction'):
def instruction_la(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0)
immediate = ins.get_imm(1)
self.regs.set(reg, immediate)
def instruction_mv(self, ins: 'LoadedInstruction'):
def instruction_mv(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2)
rd, rs = ins.get_reg(0), ins.get_reg(1)
self.regs.set(rd, self.regs.get(rs))

@ -12,48 +12,48 @@ class RV32M(InstructionSet):
"""
The RV32M Instruction set, containing multiplication and division instructions
"""
def instruction_mul(self, ins: 'LoadedInstruction'):
def instruction_mul(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 * rs2
)
def instruction_mulh(self, ins: 'LoadedInstruction'):
def instruction_mulh(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
(rs1 * rs2) >> 32
)
def instruction_mulhsu(self, ins: 'LoadedInstruction'):
def instruction_mulhsu(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_mulhu(self, ins: 'LoadedInstruction'):
def instruction_mulhu(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins)
def instruction_div(self, ins: 'LoadedInstruction'):
def instruction_div(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 // rs2
)
def instruction_divu(self, ins: 'LoadedInstruction'):
def instruction_divu(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
self.regs.set(
rd,
rs1 // rs2
)
def instruction_rem(self, ins: 'LoadedInstruction'):
def instruction_rem(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set(
rd,
rs1 % rs2
)
def instruction_remu(self, ins: 'LoadedInstruction'):
def instruction_remu(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
self.regs.set(
rd,

@ -3,15 +3,16 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT
"""
import os
import re
from typing import Dict, Tuple, Iterable, Callable
from helpers import Peekable
from .helpers import Peekable
from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
from .base_types import Program
from .colors import FMT_PARSE
from .exceptions import ParseException
from .tokenizer import Token, TokenType
from .tokenizer import Token, TokenType, tokenize
from .types import SimpleInstruction
@ -41,17 +42,32 @@ PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {
def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
"""
Convert a token stream into a parsed program
:param name: the programs name
:param tokens_iter: the programs content, tokenized
:return: a parsed program
"""
context = ParseContext(name)
for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
if token.type not in PARSERS:
raise ParseException("Unexpected token type: {}, {}".format(token, args))
print("{} {}".format(token, args))
PARSERS[token.type](token, args, context)
return context.finalize()
def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]:
"""
Convert an iterator over tokens into an iterator over tuples: (token, list(token))
The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before
a newline is encountered
:param tokens_iter: An iterator over tokens
:return: An iterator over a slightly more structured representation of the tokens
"""
tokens: Peekable[Token] = Peekable[Token](tokens_iter)
while not tokens.is_empty():
@ -75,5 +91,10 @@ def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
elif tokens.peek().type == TokenType.NEWLINE:
next(tokens)
break
raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
break
#raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
def parse_program_from_file(path: str) -> Program:
with open(path, 'r') as f:
return parse_tokens(os.path.split(path)[-1], tokenize(f))

@ -7,7 +7,7 @@ SPDX-License-Identifier: MIT
import re
from dataclasses import dataclass
from enum import Enum, auto
from typing import List, Iterable
from typing import List, Iterable, Optional
from riscemu.decoder import RISCV_REGS
from .exceptions import ParseException
@ -17,8 +17,6 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$')
REGISTER_NAMES = RISCV_REGS
I = lambda x: x
class TokenType(Enum):
COMMA = auto()
@ -39,7 +37,7 @@ class Token:
return '\\n'
if self.type == TokenType.COMMA:
return ', '
return '{}({}) '.format(self.type.name[0:3], self.value)
return '{}({})'.format(self.type.name[0:3], self.value)
NEWLINE = Token(TokenType.NEWLINE, '\n')
@ -55,7 +53,7 @@ def tokenize(input: Iterable[str]) -> Iterable[Token]:
if not line:
continue
parts = list(part for part in re.split(WHITESPACE_PATTERN, line) if part)
parts = list(part for part in split_whitespace_respecting_quotes(line) if part)
yield from parse_line(parts)
yield NEWLINE
@ -70,6 +68,8 @@ def parse_line(parts: List[str]) -> Iterable[Token]:
yield Token(TokenType.PSEUDO_OP, first_token)
elif first_token[-1] == ':':
yield Token(TokenType.LABEL, first_token)
yield from parse_line(parts[1:])
return
else:
yield Token(TokenType.INSTRUCTION_NAME, first_token)
@ -100,3 +100,40 @@ def print_tokens(tokens: Iterable[Token]):
for token in tokens:
print(token, end='\n' if token == NEWLINE else '')
print("", flush=True, end="")
def split_whitespace_respecting_quotes(line: str) -> Iterable[str]:
quote = ""
part = ""
for c in line:
if c == quote:
yield part
part = ""
quote = ""
continue
if quote != "":
part += c
continue
if c in "\"'":
if part:
yield part
quote = c
part = ""
continue
if c in ' \t\n':
if part:
yield part
part = ""
continue
part += c
if part:
yield part

@ -2,7 +2,7 @@ from typing import List, Tuple
from .exceptions import MemoryAccessException
from .helpers import parse_numeric_argument
from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \
T_AbsoluteAddress
T_AbsoluteAddress, Program
class SimpleInstruction(Instruction):
@ -26,13 +26,14 @@ class SimpleInstruction(Instruction):
class InstructionMemorySection(MemorySection):
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, base: int = 0):
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: Program, base: int = 0):
self.name = name
self.base = base
self.context = context
self.size = len(instructions) * 4
self.flags = MemoryFlags(True, True)
self.instructions = instructions
self.owner = owner.name
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
@ -47,13 +48,14 @@ class InstructionMemorySection(MemorySection):
class BinaryDataMemorySection(MemorySection):
def __init__(self, data: bytearray, name: str, context: InstructionContext, base: int = 0):
def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: Program, base: int = 0):
self.name = name
self.base = base
self.context = context
self.size = len(data)
self.flags = MemoryFlags(False, False)
self.data = data
self.owner = owner.name
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
if offset + size > self.size:

@ -0,0 +1,2 @@
from .test_tokenizer import *
from .test_helpers import *

@ -3,7 +3,7 @@ from unittest import TestCase
from riscemu.helpers import *
class Test(TestCase):
class TestHelpers(TestCase):
def test_int_to_bytes(self):
self.assertEqual(int_to_bytes(-1), bytearray([0xff] * 4), "-1")
self.assertEqual(int_to_bytes(1), bytearray([0, 0, 0, 1]), "1")

@ -1,6 +1,7 @@
from unittest import TestCase
from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA
from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA, \
split_whitespace_respecting_quotes
def ins(name: str) -> Token:
@ -19,7 +20,7 @@ def lbl(name: str) -> Token:
return Token(TokenType.LABEL, name)
class Test(TestCase):
class TestTokenizer(TestCase):
def test_instructions(self):
program = [
@ -79,3 +80,47 @@ section:
self.assertEqual(list(tokenize(program.splitlines())), tokens)
def test_split_whitespace_respecting_quotes_single(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("test")), ["test"]
)
def test_split_whitespace_respecting_quotes_empty(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("")), []
)
def test_split_whitespace_respecting_quotes_two_parts(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("test 123")), ["test", "123"]
)
def test_split_whitespace_respecting_quotes_whole_quoted(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("'test 123'")), ["test 123"]
)
def test_split_whitespace_respecting_quotes_double_quotes(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('"test 123"')), ["test 123"]
)
def test_split_whitespace_respecting_quotes_quoted_then_normal(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('"test 123" abc')), ["test 123", "abc"]
)
def test_split_whitespace_respecting_quotes_quoted_sorrounded(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello "test 123" abc')), ["hello", "test 123", "abc"]
)
def test_split_whitespace_respecting_quotes_weird_spaces(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello "test 123"\tabc')), ["hello", "test 123", "abc"]
)
def test_split_whitespace_respecting_quotes_quotes_no_spaces(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello"test 123"abc')), ["hello", "test 123", "abc"]
)

Loading…
Cancel
Save