finished basic RISC-V parser

assembly-parser-rework
Anton Lydike 3 years ago
parent dc4dca6fea
commit 0488a9d6bc

@ -2,6 +2,7 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$"> <content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/test" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/venv" /> <excludeFolder url="file://$MODULE_DIR$/venv" />
</content> </content>
<orderEntry type="inheritedJdk" /> <orderEntry type="inheritedJdk" />

@ -11,7 +11,7 @@ It contains everything needed to run assembly files, so you don't need any custo
from .exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \ from .exceptions import RiscemuBaseException, LaunchDebuggerException, InvalidSyscallException, LinkerException, \
ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException ParseException, NumberFormatException, InvalidRegisterException, MemoryAccessException, OutOfMemoryException
from .base_types import Executable, LoadedExecutable, LoadedMemorySection #from .base_types import Executable, LoadedExecutable, LoadedMemorySection
from .instructions import * from .instructions import *
@ -22,6 +22,8 @@ from .CPU import CPU
from .config import RunConfig from .config import RunConfig
from .parser import tokenize, parse_tokens, parse_program_from_file
__author__ = "Anton Lydike <Anton@Lydike.com>" __author__ = "Anton Lydike <Anton@Lydike.com>"
__copyright__ = "Copyright 2021 Anton Lydike" __copyright__ = "Copyright 2021 Anton Lydike"
__version__ = '1.0.0' __version__ = '1.0.0'

@ -1,16 +1,14 @@
from typing import Optional, Tuple, Union from typing import Optional, Tuple, Union, List
from enum import Enum, auto from enum import Enum, auto
from typing import Optional, Tuple, Union from typing import Optional, Tuple, Union
from helpers import parse_numeric_argument from .helpers import parse_numeric_argument, align_addr, int_to_bytes
from .base_types import Program, T_RelativeAddress, InstructionContext from .base_types import Program, T_RelativeAddress, InstructionContext, Instruction
from .colors import FMT_PARSE, FMT_NONE from .colors import FMT_PARSE, FMT_NONE
from .exceptions import ParseException from .exceptions import ParseException, ASSERT_LEN, ASSERT_NOT_NULL
from .helpers import ASSERT_LEN
from .tokenizer import Token from .tokenizer import Token
from .types import BinaryDataMemorySection, InstructionMemorySection from .types import BinaryDataMemorySection, InstructionMemorySection
INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini') INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini')
@ -21,13 +19,25 @@ class MemorySectionType(Enum):
class CurrentSection: class CurrentSection:
name: str name: str
data: Union[list, bytearray] data: Union[List[Instruction], bytearray]
type: MemorySectionType type: MemorySectionType
base: int
def __init__(self, name: str, type: MemorySectionType, base: int = 0):
self.name = name
self.type = type
self.base = base
if self.type == MemorySectionType.Data:
self.data = bytearray()
elif self.type == MemorySectionType.Instructions:
self.data = list()
else:
raise ParseException("Unknown section type: {}".format(type))
def current_address(self) -> T_RelativeAddress: def current_address(self) -> T_RelativeAddress:
if self.type == MemorySectionType.Data: if self.type == MemorySectionType.Data:
return len(self.data) return len(self.data) + self.base
return len(self.data) * 4 return len(self.data) * 4 + self.base
def __repr__(self): def __repr__(self):
return "{}(name={},data={},type={})".format( return "{}(name={},data={},type={})".format(
@ -47,18 +57,27 @@ class ParseContext:
self.section = None self.section = None
def finalize(self) -> Program: def finalize(self) -> Program:
self.finalize_section() self._finalize_section()
return self.program return self.program
def finalize_section(self): def _finalize_section(self):
if self.section is None: if self.section is None:
return return
if self.section.type == MemorySectionType.Data: if self.section.type == MemorySectionType.Data:
section = BinaryDataMemorySection(self.section.data, self.section.name, self.context) section = BinaryDataMemorySection(self.section.data, self.section.name, self.context, self.program)
self.program.add_section(section) self.program.add_section(section)
elif self.section.type == MemorySectionType.Instructions: elif self.section.type == MemorySectionType.Instructions:
section = InstructionMemorySection(self.section.data, self.section.name, self.context) section = InstructionMemorySection(self.section.data, self.section.name, self.context, self.program)
self.program.add_section(section) self.program.add_section(section)
self.section = None
def new_section(self, name: str, type: MemorySectionType):
base = 0
if self.section is not None:
base = align_addr(self.section.current_address(), 4)
print("base at {}".format(base))
self._finalize_section()
self.section = CurrentSection(name, type, base)
def __repr__(self): def __repr__(self):
return "{}(\n\tsetion={},\n\tprogram={}\n)".format( return "{}(\n\tsetion={},\n\tprogram={}\n)".format(
@ -100,21 +119,20 @@ class AssemblerDirectives:
@classmethod @classmethod
def op_section(cls, token: Token, args: Tuple[str], context: ParseContext): def op_section(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1) ASSERT_LEN(args, 1)
context.finalize_section()
if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES: if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES:
context.section.type = MemorySectionType.Instructions context.new_section(args[0], MemorySectionType.Instructions)
context.section.data = list()
else: else:
context.section.type = MemorySectionType.Data context.new_section(args[0], MemorySectionType.Data)
context.section.data = bytearray()
context.section.name = args[0]
@classmethod @classmethod
def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext): def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1) ASSERT_LEN(args, 1)
context.program.global_labels.add(args[0]) context.program.global_labels.add(args[0])
@classmethod
def op_global(cls, token: Token, args: Tuple[str], context: ParseContext):
cls.op_globl(token, args, context)
@classmethod @classmethod
def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext): def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 2) ASSERT_LEN(args, 2)
@ -122,6 +140,14 @@ class AssemblerDirectives:
value = parse_numeric_argument(args[1]) value = parse_numeric_argument(args[1])
context.context.labels[name] = value context.context.labels[name] = value
@classmethod
def op_space(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1)
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
size = parse_numeric_argument(args[0])
cls.add_bytes(size, None, context)
@classmethod @classmethod
def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext): def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext):
ASSERT_LEN(args, 1) ASSERT_LEN(args, 1)
@ -130,11 +156,14 @@ class AssemblerDirectives:
cls.add_bytes(size, bytearray(size), context) cls.add_bytes(size, bytearray(size), context)
@classmethod @classmethod
def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext): def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext, unsigned=False):
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data) ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
if content is None: if content is None:
content = bytearray(size) content = bytearray(size)
if isinstance(context, int):
content = int_to_bytes(content, size, unsigned)
context.section.data += content
@classmethod @classmethod
def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True): def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True):

@ -7,12 +7,14 @@ This file contains base classes which represent loaded programs
""" """
import re import re
from abc import ABC from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Set from typing import Dict, List, Optional, Tuple, Set
from collections import defaultdict
from .helpers import * from .colors import FMT_MEM, FMT_NONE, FMT_UNDERLINE, FMT_ORANGE
from .exceptions import ParseException
from .helpers import format_bytes
T_RelativeAddress = int T_RelativeAddress = int
T_AbsoluteAddress = int T_AbsoluteAddress = int

@ -4,8 +4,6 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT SPDX-License-Identifier: MIT
""" """
import typing
from abc import abstractmethod from abc import abstractmethod
from .base_types import Instruction from .base_types import Instruction
from .colors import * from .colors import *
@ -21,7 +19,7 @@ class RiscemuBaseException(BaseException):
class ParseException(RiscemuBaseException): class ParseException(RiscemuBaseException):
def __init__(self, msg, data=None): def __init__(self, msg, data=None):
super().__init__() super().__init__(msg, data)
self.msg = msg self.msg = msg
self.data = data self.data = data

@ -6,7 +6,7 @@ SPDX-License-Identifier: MIT
from math import log10, ceil from math import log10, ceil
from .exceptions import * from .exceptions import *
from typing import Iterable, Iterator, TypeVar, Generic, List from typing import Iterable, Iterator, TypeVar, Generic, List, Optional
def align_addr(addr: int, to_bytes: int = 8) -> int: def align_addr(addr: int, to_bytes: int = 8) -> int:
@ -124,7 +124,7 @@ class Peekable(Generic[T], Iterator[T]):
return self.cache.pop() return self.cache.pop()
return next(self.iterable) return next(self.iterable)
def peek(self) -> T: def peek(self) -> Optional[T]:
try: try:
if self.cache: if self.cache:
return self.cache[0] return self.cache[0]

@ -8,8 +8,9 @@ from typing import Tuple, Callable, Dict
from abc import ABC from abc import ABC
from ..CPU import CPU from ..CPU import CPU
from ..helpers import ASSERT_LEN, ASSERT_IN, to_unsigned from ..helpers import to_unsigned
from ..base_types import LoadedInstruction from ..exceptions import ASSERT_LEN, ASSERT_IN
from ..base_types import Instruction
class InstructionSet(ABC): class InstructionSet(ABC):
@ -30,7 +31,7 @@ class InstructionSet(ABC):
self.name = self.__class__.__name__ self.name = self.__class__.__name__
self.cpu = cpu self.cpu = cpu
def load(self) -> Dict[str, Callable[['LoadedInstruction'], None]]: def load(self) -> Dict[str, Callable[['Instruction'], None]]:
""" """
This is called by the CPU once it instantiates this instruction set This is called by the CPU once it instantiates this instruction set
@ -51,7 +52,7 @@ class InstructionSet(ABC):
if member.startswith('instruction_'): if member.startswith('instruction_'):
yield member[12:].replace('_', '.'), getattr(self, member) yield member[12:].replace('_', '.'), getattr(self, member)
def parse_mem_ins(self, ins: 'LoadedInstruction') -> Tuple[str, int]: def parse_mem_ins(self, ins: 'Instruction') -> Tuple[str, int]:
""" """
parses both rd, rs, imm and rd, imm(rs) argument format and returns (rd, imm+rs1) parses both rd, rs, imm and rd, imm(rs) argument format and returns (rd, imm+rs1)
(so a register and address tuple for memory instructions) (so a register and address tuple for memory instructions)
@ -69,7 +70,7 @@ class InstructionSet(ABC):
rd = ins.get_reg(0) rd = ins.get_reg(0)
return rd, rs + imm return rd, rs + imm
def parse_rd_rs_rs(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]: def parse_rd_rs_rs(self, ins: 'Instruction', signed=True) -> Tuple[str, int, int]:
""" """
Assumes the command is in <name> rd, rs1, rs2 format Assumes the command is in <name> rd, rs1, rs2 format
Returns the name of rd, and the values in rs1 and rs2 Returns the name of rd, and the values in rs1 and rs2
@ -84,7 +85,7 @@ class InstructionSet(ABC):
to_unsigned(self.get_reg_content(ins, 1)), \ to_unsigned(self.get_reg_content(ins, 1)), \
to_unsigned(self.get_reg_content(ins, 2)) to_unsigned(self.get_reg_content(ins, 2))
def parse_rd_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[str, int, int]: def parse_rd_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[str, int, int]:
""" """
Assumes the command is in <name> rd, rs, imm format Assumes the command is in <name> rd, rs, imm format
Returns the name of rd, the value in rs and the immediate imm Returns the name of rd, the value in rs and the immediate imm
@ -99,7 +100,7 @@ class InstructionSet(ABC):
to_unsigned(self.get_reg_content(ins, 1)), \ to_unsigned(self.get_reg_content(ins, 1)), \
to_unsigned(ins.get_imm(2)) to_unsigned(ins.get_imm(2))
def parse_rs_rs_imm(self, ins: 'LoadedInstruction', signed=True) -> Tuple[int, int, int]: def parse_rs_rs_imm(self, ins: 'Instruction', signed=True) -> Tuple[int, int, int]:
""" """
Assumes the command is in <name> rs1, rs2, imm format Assumes the command is in <name> rs1, rs2, imm format
Returns the values in rs1, rs2 and the immediate imm Returns the values in rs1, rs2 and the immediate imm
@ -113,7 +114,7 @@ class InstructionSet(ABC):
to_unsigned(self.get_reg_content(ins, 1)), \ to_unsigned(self.get_reg_content(ins, 1)), \
to_unsigned(ins.get_imm(2)) to_unsigned(ins.get_imm(2))
def get_reg_content(self, ins: 'LoadedInstruction', ind: int) -> int: def get_reg_content(self, ins: 'Instruction', ind: int) -> int:
""" """
get the register name from ins and then return the register contents get the register name from ins and then return the register contents
""" """

@ -1,4 +1,4 @@
from .InstructionSet import InstructionSet, LoadedInstruction from .InstructionSet import InstructionSet, Instruction
from ..exceptions import INS_NOT_IMPLEMENTED from ..exceptions import INS_NOT_IMPLEMENTED
from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed from ..helpers import int_from_bytes, int_to_bytes, to_unsigned, to_signed
@ -10,13 +10,13 @@ class RV32A(InstructionSet):
for this? for this?
""" """
def instruction_lr_w(self, ins: 'LoadedInstruction'): def instruction_lr_w(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins) INS_NOT_IMPLEMENTED(ins)
def instruction_sc_w(self, ins: 'LoadedInstruction'): def instruction_sc_w(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins) INS_NOT_IMPLEMENTED(ins)
def instruction_amoswap_w(self, ins: 'LoadedInstruction'): def instruction_amoswap_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins) dest, addr, val = self.parse_rd_rs_rs(ins)
if dest == 'zero': if dest == 'zero':
self.mmu.write(addr, int_to_bytes(addr, 4)) self.mmu.write(addr, int_to_bytes(addr, 4))
@ -25,37 +25,37 @@ class RV32A(InstructionSet):
self.mmu.write(addr, int_to_bytes(val, 4)) self.mmu.write(addr, int_to_bytes(val, 4))
self.regs.set(dest, old) self.regs.set(dest, old)
def instruction_amoadd_w(self, ins: 'LoadedInstruction'): def instruction_amoadd_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins) dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4)) old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old + val, 4)) self.mmu.write(addr, int_to_bytes(old + val, 4))
self.regs.set(dest, old) self.regs.set(dest, old)
def instruction_amoand_w(self, ins: 'LoadedInstruction'): def instruction_amoand_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins) dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4)) old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old & val, 4)) self.mmu.write(addr, int_to_bytes(old & val, 4))
self.regs.set(dest, old) self.regs.set(dest, old)
def instruction_amoor_w(self, ins: 'LoadedInstruction'): def instruction_amoor_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins) dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4)) old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old | val, 4)) self.mmu.write(addr, int_to_bytes(old | val, 4))
self.regs.set(dest, old) self.regs.set(dest, old)
def instruction_amoxor_w(self, ins: 'LoadedInstruction'): def instruction_amoxor_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins) dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4)) old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(old ^ val, 4)) self.mmu.write(addr, int_to_bytes(old ^ val, 4))
self.regs.set(dest, old) self.regs.set(dest, old)
def instruction_amomax_w(self, ins: 'LoadedInstruction'): def instruction_amomax_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins) dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4)) old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(max(old, val), 4)) self.mmu.write(addr, int_to_bytes(max(old, val), 4))
self.regs.set(dest, old) self.regs.set(dest, old)
def instruction_amomaxu_w(self, ins: 'LoadedInstruction'): def instruction_amomaxu_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins) dest, addr, val = self.parse_rd_rs_rs(ins)
val = to_unsigned(val) val = to_unsigned(val)
old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True) old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True)
@ -63,13 +63,13 @@ class RV32A(InstructionSet):
self.mmu.write(addr, int_to_bytes(to_signed(max(old, val)), 4)) self.mmu.write(addr, int_to_bytes(to_signed(max(old, val)), 4))
self.regs.set(dest, old) self.regs.set(dest, old)
def instruction_amomin_w(self, ins: 'LoadedInstruction'): def instruction_amomin_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins) dest, addr, val = self.parse_rd_rs_rs(ins)
old = int_from_bytes(self.mmu.read(addr, 4)) old = int_from_bytes(self.mmu.read(addr, 4))
self.mmu.write(addr, int_to_bytes(min(old, val), 4)) self.mmu.write(addr, int_to_bytes(min(old, val), 4))
self.regs.set(dest, old) self.regs.set(dest, old)
def instruction_amominu_w(self, ins: 'LoadedInstruction'): def instruction_amominu_w(self, ins: 'Instruction'):
dest, addr, val = self.parse_rd_rs_rs(ins) dest, addr, val = self.parse_rd_rs_rs(ins)
val = to_unsigned(val) val = to_unsigned(val)
old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True) old = int_from_bytes(self.mmu.read(addr, 4), unsigned=True)

@ -11,7 +11,7 @@ from ..colors import FMT_DEBUG, FMT_NONE
from ..debug import launch_debug_session from ..debug import launch_debug_session
from ..exceptions import LaunchDebuggerException from ..exceptions import LaunchDebuggerException
from ..syscall import Syscall from ..syscall import Syscall
from ..base_types import LoadedInstruction from ..base_types import Instruction
class RV32I(InstructionSet): class RV32I(InstructionSet):
@ -23,39 +23,39 @@ class RV32I(InstructionSet):
See https://maxvytech.com/images/RV32I-11-2018.pdf for a more detailed overview See https://maxvytech.com/images/RV32I-11-2018.pdf for a more detailed overview
""" """
def instruction_lb(self, ins: 'LoadedInstruction'): def instruction_lb(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins) rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1))) self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1)))
def instruction_lh(self, ins: 'LoadedInstruction'): def instruction_lh(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins) rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2))) self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2)))
def instruction_lw(self, ins: 'LoadedInstruction'): def instruction_lw(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins) rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 4))) self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 4)))
def instruction_lbu(self, ins: 'LoadedInstruction'): def instruction_lbu(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins) rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1), unsigned=True)) self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 1), unsigned=True))
def instruction_lhu(self, ins: 'LoadedInstruction'): def instruction_lhu(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins) rd, addr = self.parse_mem_ins(ins)
self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2), unsigned=True)) self.regs.set(rd, int_from_bytes(self.mmu.read(addr, 2), unsigned=True))
def instruction_sb(self, ins: 'LoadedInstruction'): def instruction_sb(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins) rd, addr = self.parse_mem_ins(ins)
self.mmu.write(addr, 1, int_to_bytes(self.regs.get(rd), 1)) self.mmu.write(addr, 1, int_to_bytes(self.regs.get(rd), 1))
def instruction_sh(self, ins: 'LoadedInstruction'): def instruction_sh(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins) rd, addr = self.parse_mem_ins(ins)
self.mmu.write(addr, 2, int_to_bytes(self.regs.get(rd), 2)) self.mmu.write(addr, 2, int_to_bytes(self.regs.get(rd), 2))
def instruction_sw(self, ins: 'LoadedInstruction'): def instruction_sw(self, ins: 'Instruction'):
rd, addr = self.parse_mem_ins(ins) rd, addr = self.parse_mem_ins(ins)
self.mmu.write(addr, 4, int_to_bytes(self.regs.get(rd), 4)) self.mmu.write(addr, 4, int_to_bytes(self.regs.get(rd), 4))
def instruction_sll(self, ins: 'LoadedInstruction'): def instruction_sll(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3) ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0) dst = ins.get_reg(0)
src1 = ins.get_reg(1) src1 = ins.get_reg(1)
@ -65,7 +65,7 @@ class RV32I(InstructionSet):
to_signed(to_unsigned(self.regs.get(src1)) << (self.regs.get(src2) & 0b11111)) to_signed(to_unsigned(self.regs.get(src1)) << (self.regs.get(src2) & 0b11111))
) )
def instruction_slli(self, ins: 'LoadedInstruction'): def instruction_slli(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3) ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0) dst = ins.get_reg(0)
src1 = ins.get_reg(1) src1 = ins.get_reg(1)
@ -75,7 +75,7 @@ class RV32I(InstructionSet):
to_signed(to_unsigned(self.regs.get(src1)) << (imm & 0b11111)) to_signed(to_unsigned(self.regs.get(src1)) << (imm & 0b11111))
) )
def instruction_srl(self, ins: 'LoadedInstruction'): def instruction_srl(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3) ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0) dst = ins.get_reg(0)
src1 = ins.get_reg(1) src1 = ins.get_reg(1)
@ -85,7 +85,7 @@ class RV32I(InstructionSet):
to_signed(to_unsigned(self.regs.get(src1)) >> (self.regs.get(src2) & 0b11111)) to_signed(to_unsigned(self.regs.get(src1)) >> (self.regs.get(src2) & 0b11111))
) )
def instruction_srli(self, ins: 'LoadedInstruction'): def instruction_srli(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3) ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0) dst = ins.get_reg(0)
src1 = ins.get_reg(1) src1 = ins.get_reg(1)
@ -95,7 +95,7 @@ class RV32I(InstructionSet):
to_signed(to_unsigned(self.regs.get(src1)) >> (imm & 0b11111)) to_signed(to_unsigned(self.regs.get(src1)) >> (imm & 0b11111))
) )
def instruction_sra(self, ins: 'LoadedInstruction'): def instruction_sra(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3) ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0) dst = ins.get_reg(0)
src1 = ins.get_reg(1) src1 = ins.get_reg(1)
@ -105,7 +105,7 @@ class RV32I(InstructionSet):
self.regs.get(src1) >> (self.regs.get(src2) & 0b11111) self.regs.get(src1) >> (self.regs.get(src2) & 0b11111)
) )
def instruction_srai(self, ins: 'LoadedInstruction'): def instruction_srai(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 3) ASSERT_LEN(ins.args, 3)
dst = ins.get_reg(0) dst = ins.get_reg(0)
src1 = ins.get_reg(1) src1 = ins.get_reg(1)
@ -115,7 +115,7 @@ class RV32I(InstructionSet):
self.regs.get(src1) >> (imm & 0b11111) self.regs.get(src1) >> (imm & 0b11111)
) )
def instruction_add(self, ins: 'LoadedInstruction'): def instruction_add(self, ins: 'Instruction'):
dst = "" dst = ""
if self.cpu.conf.add_accept_imm: if self.cpu.conf.add_accept_imm:
try: try:
@ -130,139 +130,139 @@ class RV32I(InstructionSet):
rs1 + rs2 rs1 + rs2
) )
def instruction_addi(self, ins: 'LoadedInstruction'): def instruction_addi(self, ins: 'Instruction'):
dst, rs1, imm = self.parse_rd_rs_imm(ins) dst, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set( self.regs.set(
dst, dst,
rs1 + imm rs1 + imm
) )
def instruction_sub(self, ins: 'LoadedInstruction'): def instruction_sub(self, ins: 'Instruction'):
dst, rs1, rs2 = self.parse_rd_rs_rs(ins) dst, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set( self.regs.set(
dst, dst,
rs1 - rs2 rs1 - rs2
) )
def instruction_lui(self, ins: 'LoadedInstruction'): def instruction_lui(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2) ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0) reg = ins.get_reg(0)
imm = ins.get_imm(1) imm = ins.get_imm(1)
self.regs.set(reg, imm << 12) self.regs.set(reg, imm << 12)
def instruction_auipc(self, ins: 'LoadedInstruction'): def instruction_auipc(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2) ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0) reg = ins.get_reg(0)
imm = to_unsigned(ins.get_imm(1)) imm = to_unsigned(ins.get_imm(1))
self.regs.set(reg, self.pc + (imm << 12)) self.regs.set(reg, self.pc + (imm << 12))
def instruction_xor(self, ins: 'LoadedInstruction'): def instruction_xor(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins) rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set( self.regs.set(
rd, rd,
rs1 ^ rs2 rs1 ^ rs2
) )
def instruction_xori(self, ins: 'LoadedInstruction'): def instruction_xori(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins) rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set( self.regs.set(
rd, rd,
rs1 ^ imm rs1 ^ imm
) )
def instruction_or(self, ins: 'LoadedInstruction'): def instruction_or(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins) rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set( self.regs.set(
rd, rd,
rs1 | rs2 rs1 | rs2
) )
def instruction_ori(self, ins: 'LoadedInstruction'): def instruction_ori(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins) rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set( self.regs.set(
rd, rd,
rs1 | imm rs1 | imm
) )
def instruction_and(self, ins: 'LoadedInstruction'): def instruction_and(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins) rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set( self.regs.set(
rd, rd,
rs1 & rs2 rs1 & rs2
) )
def instruction_andi(self, ins: 'LoadedInstruction'): def instruction_andi(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins) rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set( self.regs.set(
rd, rd,
rs1 & imm rs1 & imm
) )
def instruction_slt(self, ins: 'LoadedInstruction'): def instruction_slt(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins) rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set( self.regs.set(
rd, rd,
int(rs1 < rs2) int(rs1 < rs2)
) )
def instruction_slti(self, ins: 'LoadedInstruction'): def instruction_slti(self, ins: 'Instruction'):
rd, rs1, imm = self.parse_rd_rs_imm(ins) rd, rs1, imm = self.parse_rd_rs_imm(ins)
self.regs.set( self.regs.set(
rd, rd,
int(rs1 < imm) int(rs1 < imm)
) )
def instruction_sltu(self, ins: 'LoadedInstruction'): def instruction_sltu(self, ins: 'Instruction'):
dst, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False) dst, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
self.regs.set( self.regs.set(
dst, dst,
int(rs1 < rs2) int(rs1 < rs2)
) )
def instruction_sltiu(self, ins: 'LoadedInstruction'): def instruction_sltiu(self, ins: 'Instruction'):
dst, rs1, imm = self.parse_rd_rs_imm(ins, signed=False) dst, rs1, imm = self.parse_rd_rs_imm(ins, signed=False)
self.regs.set( self.regs.set(
dst, dst,
int(rs1 < imm) int(rs1 < imm)
) )
def instruction_beq(self, ins: 'LoadedInstruction'): def instruction_beq(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins) rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 == rs2: if rs1 == rs2:
self.pc = dst self.pc = dst
def instruction_bne(self, ins: 'LoadedInstruction'): def instruction_bne(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins) rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 != rs2: if rs1 != rs2:
self.pc = dst self.pc = dst
def instruction_blt(self, ins: 'LoadedInstruction'): def instruction_blt(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins) rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 < rs2: if rs1 < rs2:
self.pc = dst self.pc = dst
def instruction_bge(self, ins: 'LoadedInstruction'): def instruction_bge(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins) rs1, rs2, dst = self.parse_rs_rs_imm(ins)
if rs1 >= rs2: if rs1 >= rs2:
self.pc = dst self.pc = dst
def instruction_bltu(self, ins: 'LoadedInstruction'): def instruction_bltu(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False) rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
if rs1 < rs2: if rs1 < rs2:
self.pc = dst self.pc = dst
def instruction_bgeu(self, ins: 'LoadedInstruction'): def instruction_bgeu(self, ins: 'Instruction'):
rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False) rs1, rs2, dst = self.parse_rs_rs_imm(ins, signed=False)
if rs1 >= rs2: if rs1 >= rs2:
self.pc = dst self.pc = dst
# technically deprecated # technically deprecated
def instruction_j(self, ins: 'LoadedInstruction'): def instruction_j(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 1) ASSERT_LEN(ins.args, 1)
addr = ins.get_imm(0) addr = ins.get_imm(0)
self.pc = addr self.pc = addr
def instruction_jal(self, ins: 'LoadedInstruction'): def instruction_jal(self, ins: 'Instruction'):
reg = 'ra' # default register is ra reg = 'ra' # default register is ra
if len(ins.args) == 1: if len(ins.args) == 1:
addr = ins.get_imm(0) addr = ins.get_imm(0)
@ -273,29 +273,29 @@ class RV32I(InstructionSet):
self.regs.set(reg, self.pc) self.regs.set(reg, self.pc)
self.pc = addr self.pc = addr
def instruction_jalr(self, ins: 'LoadedInstruction'): def instruction_jalr(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2) ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0) reg = ins.get_reg(0)
addr = ins.get_imm(1) addr = ins.get_imm(1)
self.regs.set(reg, self.pc) self.regs.set(reg, self.pc)
self.pc = addr self.pc = addr
def instruction_ret(self, ins: 'LoadedInstruction'): def instruction_ret(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0) ASSERT_LEN(ins.args, 0)
self.pc = self.regs.get('ra') self.pc = self.regs.get('ra')
def instruction_ecall(self, ins: 'LoadedInstruction'): def instruction_ecall(self, ins: 'Instruction'):
self.instruction_scall(ins) self.instruction_scall(ins)
def instruction_ebreak(self, ins: 'LoadedInstruction'): def instruction_ebreak(self, ins: 'Instruction'):
self.instruction_sbreak(ins) self.instruction_sbreak(ins)
def instruction_scall(self, ins: 'LoadedInstruction'): def instruction_scall(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0) ASSERT_LEN(ins.args, 0)
syscall = Syscall(self.regs.get('a7'), self.cpu) syscall = Syscall(self.regs.get('a7'), self.cpu)
self.cpu.syscall_int.handle_syscall(syscall) self.cpu.syscall_int.handle_syscall(syscall)
def instruction_sbreak(self, ins: 'LoadedInstruction'): def instruction_sbreak(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0) ASSERT_LEN(ins.args, 0)
if self.cpu.active_debug: if self.cpu.active_debug:
print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE) print(FMT_DEBUG + "Debug instruction encountered at 0x{:08X}".format(self.pc - 1) + FMT_NONE)
@ -307,23 +307,23 @@ class RV32I(InstructionSet):
"Debug instruction encountered at 0x{:08X}".format(self.pc - 1) "Debug instruction encountered at 0x{:08X}".format(self.pc - 1)
) )
def instruction_nop(self, ins: 'LoadedInstruction'): def instruction_nop(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 0) ASSERT_LEN(ins.args, 0)
pass pass
def instruction_li(self, ins: 'LoadedInstruction'): def instruction_li(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2) ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0) reg = ins.get_reg(0)
immediate = ins.get_imm(1) immediate = ins.get_imm(1)
self.regs.set(reg, immediate) self.regs.set(reg, immediate)
def instruction_la(self, ins: 'LoadedInstruction'): def instruction_la(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2) ASSERT_LEN(ins.args, 2)
reg = ins.get_reg(0) reg = ins.get_reg(0)
immediate = ins.get_imm(1) immediate = ins.get_imm(1)
self.regs.set(reg, immediate) self.regs.set(reg, immediate)
def instruction_mv(self, ins: 'LoadedInstruction'): def instruction_mv(self, ins: 'Instruction'):
ASSERT_LEN(ins.args, 2) ASSERT_LEN(ins.args, 2)
rd, rs = ins.get_reg(0), ins.get_reg(1) rd, rs = ins.get_reg(0), ins.get_reg(1)
self.regs.set(rd, self.regs.get(rs)) self.regs.set(rd, self.regs.get(rs))

@ -12,48 +12,48 @@ class RV32M(InstructionSet):
""" """
The RV32M Instruction set, containing multiplication and division instructions The RV32M Instruction set, containing multiplication and division instructions
""" """
def instruction_mul(self, ins: 'LoadedInstruction'): def instruction_mul(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins) rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set( self.regs.set(
rd, rd,
rs1 * rs2 rs1 * rs2
) )
def instruction_mulh(self, ins: 'LoadedInstruction'): def instruction_mulh(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins) rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set( self.regs.set(
rd, rd,
(rs1 * rs2) >> 32 (rs1 * rs2) >> 32
) )
def instruction_mulhsu(self, ins: 'LoadedInstruction'): def instruction_mulhsu(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins) INS_NOT_IMPLEMENTED(ins)
def instruction_mulhu(self, ins: 'LoadedInstruction'): def instruction_mulhu(self, ins: 'Instruction'):
INS_NOT_IMPLEMENTED(ins) INS_NOT_IMPLEMENTED(ins)
def instruction_div(self, ins: 'LoadedInstruction'): def instruction_div(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins) rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set( self.regs.set(
rd, rd,
rs1 // rs2 rs1 // rs2
) )
def instruction_divu(self, ins: 'LoadedInstruction'): def instruction_divu(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False) rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
self.regs.set( self.regs.set(
rd, rd,
rs1 // rs2 rs1 // rs2
) )
def instruction_rem(self, ins: 'LoadedInstruction'): def instruction_rem(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins) rd, rs1, rs2 = self.parse_rd_rs_rs(ins)
self.regs.set( self.regs.set(
rd, rd,
rs1 % rs2 rs1 % rs2
) )
def instruction_remu(self, ins: 'LoadedInstruction'): def instruction_remu(self, ins: 'Instruction'):
rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False) rd, rs1, rs2 = self.parse_rd_rs_rs(ins, signed=False)
self.regs.set( self.regs.set(
rd, rd,

@ -3,15 +3,16 @@ RiscEmu (c) 2021 Anton Lydike
SPDX-License-Identifier: MIT SPDX-License-Identifier: MIT
""" """
import os
import re import re
from typing import Dict, Tuple, Iterable, Callable from typing import Dict, Tuple, Iterable, Callable
from helpers import Peekable from .helpers import Peekable
from .assembler import MemorySectionType, ParseContext, AssemblerDirectives from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
from .base_types import Program from .base_types import Program
from .colors import FMT_PARSE from .colors import FMT_PARSE
from .exceptions import ParseException from .exceptions import ParseException
from .tokenizer import Token, TokenType from .tokenizer import Token, TokenType, tokenize
from .types import SimpleInstruction from .types import SimpleInstruction
@ -41,17 +42,32 @@ PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {
def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program: def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
"""
Convert a token stream into a parsed program
:param name: the programs name
:param tokens_iter: the programs content, tokenized
:return: a parsed program
"""
context = ParseContext(name) context = ParseContext(name)
for token, args in composite_tokenizer(Peekable[Token](tokens_iter)): for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
if token.type not in PARSERS: if token.type not in PARSERS:
raise ParseException("Unexpected token type: {}, {}".format(token, args)) raise ParseException("Unexpected token type: {}, {}".format(token, args))
print("{} {}".format(token, args))
PARSERS[token.type](token, args, context) PARSERS[token.type](token, args, context)
return context.finalize() return context.finalize()
def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]: def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]:
"""
Convert an iterator over tokens into an iterator over tuples: (token, list(token))
The first token ist either a pseudo_op, label, or instruction name. The token list are all remaining tokens before
a newline is encountered
:param tokens_iter: An iterator over tokens
:return: An iterator over a slightly more structured representation of the tokens
"""
tokens: Peekable[Token] = Peekable[Token](tokens_iter) tokens: Peekable[Token] = Peekable[Token](tokens_iter)
while not tokens.is_empty(): while not tokens.is_empty():
@ -75,5 +91,10 @@ def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
elif tokens.peek().type == TokenType.NEWLINE: elif tokens.peek().type == TokenType.NEWLINE:
next(tokens) next(tokens)
break break
raise ParseException("Expected newline, instead got {}".format(tokens.peek())) break
#raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
def parse_program_from_file(path: str) -> Program:
with open(path, 'r') as f:
return parse_tokens(os.path.split(path)[-1], tokenize(f))

@ -7,7 +7,7 @@ SPDX-License-Identifier: MIT
import re import re
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum, auto from enum import Enum, auto
from typing import List, Iterable from typing import List, Iterable, Optional
from riscemu.decoder import RISCV_REGS from riscemu.decoder import RISCV_REGS
from .exceptions import ParseException from .exceptions import ParseException
@ -17,8 +17,6 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$') MEMORY_ADDRESS_PATTERN = re.compile(r'^(0[xX][A-f0-9]+|\d+|0b[0-1]+)\(([A-z]+[0-9]{0,2})\)$')
REGISTER_NAMES = RISCV_REGS REGISTER_NAMES = RISCV_REGS
I = lambda x: x
class TokenType(Enum): class TokenType(Enum):
COMMA = auto() COMMA = auto()
@ -39,7 +37,7 @@ class Token:
return '\\n' return '\\n'
if self.type == TokenType.COMMA: if self.type == TokenType.COMMA:
return ', ' return ', '
return '{}({}) '.format(self.type.name[0:3], self.value) return '{}({})'.format(self.type.name[0:3], self.value)
NEWLINE = Token(TokenType.NEWLINE, '\n') NEWLINE = Token(TokenType.NEWLINE, '\n')
@ -55,7 +53,7 @@ def tokenize(input: Iterable[str]) -> Iterable[Token]:
if not line: if not line:
continue continue
parts = list(part for part in re.split(WHITESPACE_PATTERN, line) if part) parts = list(part for part in split_whitespace_respecting_quotes(line) if part)
yield from parse_line(parts) yield from parse_line(parts)
yield NEWLINE yield NEWLINE
@ -70,6 +68,8 @@ def parse_line(parts: List[str]) -> Iterable[Token]:
yield Token(TokenType.PSEUDO_OP, first_token) yield Token(TokenType.PSEUDO_OP, first_token)
elif first_token[-1] == ':': elif first_token[-1] == ':':
yield Token(TokenType.LABEL, first_token) yield Token(TokenType.LABEL, first_token)
yield from parse_line(parts[1:])
return
else: else:
yield Token(TokenType.INSTRUCTION_NAME, first_token) yield Token(TokenType.INSTRUCTION_NAME, first_token)
@ -100,3 +100,40 @@ def print_tokens(tokens: Iterable[Token]):
for token in tokens: for token in tokens:
print(token, end='\n' if token == NEWLINE else '') print(token, end='\n' if token == NEWLINE else '')
print("", flush=True, end="") print("", flush=True, end="")
def split_whitespace_respecting_quotes(line: str) -> Iterable[str]:
quote = ""
part = ""
for c in line:
if c == quote:
yield part
part = ""
quote = ""
continue
if quote != "":
part += c
continue
if c in "\"'":
if part:
yield part
quote = c
part = ""
continue
if c in ' \t\n':
if part:
yield part
part = ""
continue
part += c
if part:
yield part

@ -2,7 +2,7 @@ from typing import List, Tuple
from .exceptions import MemoryAccessException from .exceptions import MemoryAccessException
from .helpers import parse_numeric_argument from .helpers import parse_numeric_argument
from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \ from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \
T_AbsoluteAddress T_AbsoluteAddress, Program
class SimpleInstruction(Instruction): class SimpleInstruction(Instruction):
@ -26,13 +26,14 @@ class SimpleInstruction(Instruction):
class InstructionMemorySection(MemorySection): class InstructionMemorySection(MemorySection):
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, base: int = 0): def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, owner: Program, base: int = 0):
self.name = name self.name = name
self.base = base self.base = base
self.context = context self.context = context
self.size = len(instructions) * 4 self.size = len(instructions) * 4
self.flags = MemoryFlags(True, True) self.flags = MemoryFlags(True, True)
self.instructions = instructions self.instructions = instructions
self.owner = owner.name
def read(self, offset: T_RelativeAddress, size: int) -> bytearray: def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read') raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
@ -47,13 +48,14 @@ class InstructionMemorySection(MemorySection):
class BinaryDataMemorySection(MemorySection): class BinaryDataMemorySection(MemorySection):
def __init__(self, data: bytearray, name: str, context: InstructionContext, base: int = 0): def __init__(self, data: bytearray, name: str, context: InstructionContext, owner: Program, base: int = 0):
self.name = name self.name = name
self.base = base self.base = base
self.context = context self.context = context
self.size = len(data) self.size = len(data)
self.flags = MemoryFlags(False, False) self.flags = MemoryFlags(False, False)
self.data = data self.data = data
self.owner = owner.name
def read(self, offset: T_RelativeAddress, size: int) -> bytearray: def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
if offset + size > self.size: if offset + size > self.size:

@ -0,0 +1,2 @@
from .test_tokenizer import *
from .test_helpers import *

@ -3,7 +3,7 @@ from unittest import TestCase
from riscemu.helpers import * from riscemu.helpers import *
class Test(TestCase): class TestHelpers(TestCase):
def test_int_to_bytes(self): def test_int_to_bytes(self):
self.assertEqual(int_to_bytes(-1), bytearray([0xff] * 4), "-1") self.assertEqual(int_to_bytes(-1), bytearray([0xff] * 4), "-1")
self.assertEqual(int_to_bytes(1), bytearray([0, 0, 0, 1]), "1") self.assertEqual(int_to_bytes(1), bytearray([0, 0, 0, 1]), "1")

@ -1,6 +1,7 @@
from unittest import TestCase from unittest import TestCase
from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA, \
split_whitespace_respecting_quotes
def ins(name: str) -> Token: def ins(name: str) -> Token:
@ -19,7 +20,7 @@ def lbl(name: str) -> Token:
return Token(TokenType.LABEL, name) return Token(TokenType.LABEL, name)
class Test(TestCase): class TestTokenizer(TestCase):
def test_instructions(self): def test_instructions(self):
program = [ program = [
@ -79,3 +80,47 @@ section:
self.assertEqual(list(tokenize(program.splitlines())), tokens) self.assertEqual(list(tokenize(program.splitlines())), tokens)
def test_split_whitespace_respecting_quotes_single(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("test")), ["test"]
)
def test_split_whitespace_respecting_quotes_empty(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("")), []
)
def test_split_whitespace_respecting_quotes_two_parts(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("test 123")), ["test", "123"]
)
def test_split_whitespace_respecting_quotes_whole_quoted(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("'test 123'")), ["test 123"]
)
def test_split_whitespace_respecting_quotes_double_quotes(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('"test 123"')), ["test 123"]
)
def test_split_whitespace_respecting_quotes_quoted_then_normal(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('"test 123" abc')), ["test 123", "abc"]
)
def test_split_whitespace_respecting_quotes_quoted_sorrounded(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello "test 123" abc')), ["hello", "test 123", "abc"]
)
def test_split_whitespace_respecting_quotes_weird_spaces(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello "test 123"\tabc')), ["hello", "test 123", "abc"]
)
def test_split_whitespace_respecting_quotes_quotes_no_spaces(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello"test 123"abc')), ["hello", "test 123", "abc"]
)

Loading…
Cancel
Save