[wip] almost done with the rework of the parser and internal data structure representation of programs
parent
84562de98f
commit
dc4dca6fea
@ -1,319 +0,0 @@
|
||||
"""
|
||||
RiscEmu (c) 2021 Anton Lydike
|
||||
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
This file holds Executable and LoadedExecutable classes as well as loading and some linking code.
|
||||
|
||||
FIXME: refactor this code into muliple files
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Tuple, Union, Optional
|
||||
from .Exceptions import *
|
||||
from .helpers import *
|
||||
from math import log
|
||||
|
||||
import typing
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from .Tokenizer import RiscVInstructionToken
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemoryFlags:
|
||||
read_only: bool
|
||||
executable: bool
|
||||
|
||||
def __repr__(self):
|
||||
return "{}({},{})".format(
|
||||
self.__class__.__name__,
|
||||
'ro' if self.read_only else 'rw',
|
||||
'x' if self.executable else '-'
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemorySection:
|
||||
name: str
|
||||
flags: MemoryFlags
|
||||
size: int = 0
|
||||
content: List[bytearray] = field(default_factory=list)
|
||||
|
||||
def add(self, data: bytearray):
|
||||
self.content.append(data)
|
||||
self.size += len(data)
|
||||
|
||||
def continuous_content(self, parent: 'LoadedExecutable'):
|
||||
"""
|
||||
converts the content into one continuous bytearray
|
||||
"""
|
||||
if self.size == 0:
|
||||
return bytearray(0)
|
||||
content = self.content[0]
|
||||
for b in self.content[1:]:
|
||||
content += b
|
||||
return content
|
||||
|
||||
|
||||
@dataclass
|
||||
class InstructionMemorySection(MemorySection):
|
||||
content: List['RiscVInstructionToken'] = field(default_factory=list)
|
||||
|
||||
def add_insn(self, insn: 'RiscVInstructionToken'):
|
||||
self.content.append(insn)
|
||||
self.size += 1
|
||||
|
||||
def continuous_content(self, parent: 'LoadedExecutable'):
|
||||
return [
|
||||
LoadedInstruction(ins.instruction, ins.args, parent)
|
||||
for ins in self.content
|
||||
]
|
||||
|
||||
|
||||
@dataclass()
|
||||
class Executable:
|
||||
run_ptr: Tuple[str, int]
|
||||
sections: Dict[str, MemorySection]
|
||||
symbols: Dict[str, Tuple[str, int]]
|
||||
exported_symbols: List[str]
|
||||
name: str
|
||||
|
||||
def __repr__(self):
|
||||
return "{}(sections = {}, symbols = {}, run_ptr = {}, globals={})".format(
|
||||
self.__class__.__name__,
|
||||
" ".join(self.sections.keys()),
|
||||
" ".join(self.symbols.keys()),
|
||||
self.run_ptr,
|
||||
",".join(self.exported_symbols)
|
||||
)
|
||||
|
||||
|
||||
### LOADING CODE
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LoadedInstruction:
|
||||
"""
|
||||
An instruction which is loaded into memory. It knows the binary it belongs to to resolve symbols
|
||||
"""
|
||||
name: str
|
||||
args: List[str]
|
||||
bin: 'LoadedExecutable'
|
||||
|
||||
def get_imm(self, num: int):
|
||||
"""
|
||||
parse and get immediate argument
|
||||
"""
|
||||
if len(self.args) <= num:
|
||||
raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
|
||||
arg = self.args[num]
|
||||
# look up symbols
|
||||
if self.bin.has_symb(arg):
|
||||
return self.bin.lookup_symbol(arg)
|
||||
return parse_numeric_argument(arg)
|
||||
|
||||
def get_imm_reg(self, num: int):
|
||||
"""
|
||||
parse and get an argument imm(reg)
|
||||
"""
|
||||
if len(self.args) <= num:
|
||||
raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
|
||||
arg = self.args[num]
|
||||
ASSERT_IN("(", arg)
|
||||
imm, reg = arg[:-1].split("(")
|
||||
if self.bin.has_symb(imm):
|
||||
return self.bin.lookup_symbol(imm), reg
|
||||
return parse_numeric_argument(imm), reg
|
||||
|
||||
def get_reg(self, num: int):
|
||||
"""
|
||||
parse and get an register argument
|
||||
"""
|
||||
if len(self.args) <= num:
|
||||
raise ParseException("Instruction {} expected argument at {} (args: {})".format(self.name, num, self.args))
|
||||
return self.args[num]
|
||||
|
||||
def __repr__(self):
|
||||
return "{} {}".format(self.name, ", ".join(self.args))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LoadedMemorySection:
|
||||
"""
|
||||
A section which is loaded into memory
|
||||
"""
|
||||
name: str
|
||||
base: int
|
||||
size: int
|
||||
content: Union[List[LoadedInstruction], bytearray] = field(repr=False)
|
||||
flags: MemoryFlags
|
||||
owner: str
|
||||
|
||||
def read(self, offset: int, size: int):
|
||||
if offset < 0:
|
||||
raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, size, 'read')
|
||||
if offset + size > self.size:
|
||||
raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
|
||||
size, 'read')
|
||||
return self.content[offset: offset + size]
|
||||
|
||||
def read_instruction(self, offset):
|
||||
if not self.flags.executable:
|
||||
raise MemoryAccessException('Section not executable!', self.base + offset, 1, 'read exec')
|
||||
|
||||
if offset < 0:
|
||||
raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'read exec')
|
||||
if offset >= self.size:
|
||||
raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
|
||||
1, 'read exec')
|
||||
return self.content[offset]
|
||||
|
||||
def write(self, offset, size, data):
|
||||
if self.flags.read_only:
|
||||
raise MemoryAccessException('Section not writeable {}'.format(self.name), self.base + offset, size, 'write')
|
||||
|
||||
if offset < 0:
|
||||
raise MemoryAccessException('Invalid offset {}'.format(offset), self.base + offset, 1, 'write')
|
||||
if offset >= self.size:
|
||||
raise MemoryAccessException('Outside section boundary of section {}'.format(self.name), self.base + offset,
|
||||
size, 'write')
|
||||
|
||||
for i in range(size):
|
||||
self.content[offset + i] = data[i]
|
||||
|
||||
def dump(self, at_addr=None, fmt='hex', max_rows=10, group=4, bytes_per_row=16, all=False):
|
||||
highlight = -1
|
||||
if at_addr is None:
|
||||
at_addr = self.base
|
||||
else:
|
||||
highlight = at_addr - self.base
|
||||
|
||||
at_off = at_addr - self.base
|
||||
start = max(align_addr(at_off - ((max_rows * bytes_per_row) // 2), 8) - 8, 0)
|
||||
if all:
|
||||
end = self.size
|
||||
start = 0
|
||||
else:
|
||||
end = min(start + (max_rows * bytes_per_row), self.size)
|
||||
|
||||
fmt_str = " 0x{:0" + str(ceil(log(self.base + end, 16))) + "X}: {}"
|
||||
|
||||
if self.flags.executable:
|
||||
# this section holds instructions!
|
||||
start = 0 if all else max(at_off - (max_rows // 2), 0)
|
||||
end = self.size if all else min(self.size, start + max_rows)
|
||||
print(FMT_MEM + "{}, viewing {} instructions:".format(
|
||||
self, end - start
|
||||
) + FMT_NONE)
|
||||
for i in range(start, end):
|
||||
if i == highlight:
|
||||
ins = FMT_UNDERLINE + FMT_ORANGE + repr(self.content[i]) + FMT_NONE
|
||||
else:
|
||||
ins = repr(self.content[i])
|
||||
print(fmt_str.format(self.base + i, ins))
|
||||
else:
|
||||
print(FMT_MEM + "{}, viewing {} bytes:".format(
|
||||
self, end - start
|
||||
) + FMT_NONE)
|
||||
for i in range(0, end - start, bytes_per_row):
|
||||
data = self.content[start + i: min(start + i + bytes_per_row, end)]
|
||||
if start + i <= highlight <= start + i + bytes_per_row:
|
||||
# do hightlight here!
|
||||
hi_ind = (highlight - start - i) // group
|
||||
print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group, highlight=hi_ind)))
|
||||
else:
|
||||
print(fmt_str.format(self.base + start + i, format_bytes(data, fmt, group)))
|
||||
if end == self.size:
|
||||
print(FMT_MEM + "End of section!" + FMT_NONE)
|
||||
else:
|
||||
print(FMT_MEM + "More bytes ..." + FMT_NONE)
|
||||
|
||||
def __repr__(self):
|
||||
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
|
||||
self.__class__.__name__,
|
||||
self.name,
|
||||
self.base,
|
||||
self.size,
|
||||
self.flags,
|
||||
self.owner
|
||||
)
|
||||
|
||||
|
||||
class LoadedExecutable:
|
||||
"""
|
||||
This represents an executable which is loaded into memory at address base_addr
|
||||
|
||||
This is basicalle the "loader" in normal system environments
|
||||
It initializes the stack and heap
|
||||
|
||||
It still holds a symbol table, that is not accessible memory since I don't want to deal with
|
||||
binary strings in memory etc.
|
||||
"""
|
||||
name: str
|
||||
base_addr: int
|
||||
sections_by_name: Dict[str, LoadedMemorySection]
|
||||
sections: List[LoadedMemorySection]
|
||||
symbols: Dict[str, int]
|
||||
run_ptr: int
|
||||
exported_symbols: Dict[str, int]
|
||||
global_symbol_table: Dict[str, int]
|
||||
|
||||
def __init__(self, exe: Executable, base_addr: int, global_symbol_table: Dict[str, int]):
|
||||
self.name = exe.name
|
||||
self.base_addr = base_addr
|
||||
self.sections = list()
|
||||
self.sections_by_name = dict()
|
||||
self.symbols = dict()
|
||||
self.exported_symbols = dict()
|
||||
self.global_symbol_table = global_symbol_table
|
||||
|
||||
curr = base_addr
|
||||
for sec in exe.sections.values():
|
||||
loaded_sec = LoadedMemorySection(
|
||||
sec.name,
|
||||
curr,
|
||||
sec.size,
|
||||
sec.continuous_content(self),
|
||||
sec.flags,
|
||||
self.name
|
||||
)
|
||||
self.sections.append(loaded_sec)
|
||||
self.sections_by_name[loaded_sec.name] = loaded_sec
|
||||
curr = align_addr(loaded_sec.size + curr)
|
||||
|
||||
for name, (sec_name, offset) in exe.symbols.items():
|
||||
if sec_name == '_static_':
|
||||
self.symbols[name] = offset
|
||||
else:
|
||||
ASSERT_IN(sec_name, self.sections_by_name)
|
||||
self.symbols[name] = self.sections_by_name[sec_name].base + offset
|
||||
|
||||
for name in exe.exported_symbols:
|
||||
self.exported_symbols[name] = self.symbols[name]
|
||||
|
||||
self.size = curr - base_addr
|
||||
|
||||
# translate run_ptr from executable
|
||||
run_ptr_sec, run_ptr_off = exe.run_ptr
|
||||
self.run_ptr = self.sections_by_name[run_ptr_sec].base + run_ptr_off
|
||||
|
||||
def lookup_symbol(self, name):
|
||||
if name in self.symbols:
|
||||
return self.symbols[name]
|
||||
if name in self.global_symbol_table:
|
||||
return self.global_symbol_table[name]
|
||||
raise LinkerException('Symbol {} not found!'.format(name), (self,))
|
||||
|
||||
def __repr__(self):
|
||||
return '{}[{}](base=0x{:08X}, size={}bytes, sections={}, run_ptr=0x{:08X})'.format(
|
||||
self.__class__.__name__,
|
||||
self.name,
|
||||
self.base_addr,
|
||||
self.size,
|
||||
" ".join(self.sections_by_name.keys()),
|
||||
self.run_ptr
|
||||
)
|
||||
|
||||
def has_symb(self, arg):
|
||||
return arg in self.symbols or arg in self.global_symbol_table
|
@ -1,193 +0,0 @@
|
||||
"""
|
||||
RiscEmu (c) 2021 Anton Lydike
|
||||
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
This file holds the parser that parses the tokenizer output.
|
||||
"""
|
||||
|
||||
from .helpers import parse_numeric_argument, int_to_bytes
|
||||
from .Executable import Executable, InstructionMemorySection, MemorySection, MemoryFlags
|
||||
from .Exceptions import *
|
||||
|
||||
from .Tokenizer import tokenize, TokenType, Token, COMMA, NEWLINE
|
||||
|
||||
from typing import Dict, Tuple, List, Optional
|
||||
|
||||
|
||||
class ExecutableParser:
|
||||
"""
|
||||
Parses output form the RiscVTokenizer
|
||||
"""
|
||||
tokenizer: 'RiscVTokenizer'
|
||||
|
||||
def __init__(self, tokenizer: 'RiscVTokenizer'):
|
||||
self.instructions: List['RiscVInstructionToken'] = list()
|
||||
self.symbols: Dict[str, Tuple[str, int]] = dict()
|
||||
self.sections: Dict[str, MemorySection] = dict()
|
||||
self.tokenizer = tokenizer
|
||||
self.active_section: Optional[str] = None
|
||||
self.implicit_sections = False
|
||||
self.globals: List[str] = list()
|
||||
|
||||
def parse(self) -> Executable:
|
||||
"""
|
||||
parse tokenizer output into an executable
|
||||
:return: the parsed executable
|
||||
:raise ParseException: Raises a ParseException when invalid input is read
|
||||
"""
|
||||
for token in self.tokenizer.tokens:
|
||||
if isinstance(token, 'RiscVInstructionToken'):
|
||||
self.parse_instruction(token)
|
||||
elif isinstance(token, 'RiscVSymbolToken'):
|
||||
self.handle_symbol(token)
|
||||
elif isinstance(token, 'RiscVPseudoOpToken'):
|
||||
self.handle_pseudo_op(token)
|
||||
return self._get_execuable()
|
||||
|
||||
def _get_execuable(self) -> Executable:
|
||||
start_ptr = ('text', 0)
|
||||
if '_start' in self.symbols:
|
||||
start_ptr = self.symbols['_start']
|
||||
elif 'main' in self.symbols:
|
||||
start_ptr = self.symbols['main']
|
||||
return Executable(start_ptr, self.sections, self.symbols, self.globals, self.tokenizer.name)
|
||||
|
||||
def parse_instruction(self, ins: 'RiscVInstructionToken') -> None:
|
||||
"""
|
||||
parses an Instruction token
|
||||
:param ins: the instruction token
|
||||
"""
|
||||
if self.active_section is None:
|
||||
self.op_text()
|
||||
self.implicit_sections = True
|
||||
|
||||
ASSERT_EQ(self.active_section, 'text')
|
||||
sec = self._curr_sec()
|
||||
if isinstance(sec, InstructionMemorySection):
|
||||
sec.add_insn(ins)
|
||||
else:
|
||||
raise ParseException("SHOULD NOT BE REACHED")
|
||||
|
||||
def handle_symbol(self, token: 'RiscVSymbolToken'):
|
||||
"""
|
||||
Handle a symbol token (such as 'main:')
|
||||
:param token: the symbol token
|
||||
"""
|
||||
ASSERT_NOT_IN(token.name, self.symbols)
|
||||
ASSERT_NOT_NULL(self.active_section)
|
||||
sec_pos = self._curr_sec().size
|
||||
self.symbols[token.name] = (self.active_section, sec_pos)
|
||||
|
||||
def handle_pseudo_op(self, op: 'RiscVPseudoOpToken'):
|
||||
"""
|
||||
Handle a pseudo op token (such as '.word 0xffaabbcc')
|
||||
:param op: the peseudo-op token
|
||||
"""
|
||||
name = 'op_' + op.name
|
||||
if hasattr(self, name):
|
||||
getattr(self, name)(op)
|
||||
else:
|
||||
raise ParseException("Unknown pseudo op: {}".format(op), (op,))
|
||||
|
||||
## Pseudo op implementations:
|
||||
def op_section(self, op: 'RiscVPseudoOpToken'):
|
||||
"""
|
||||
handles a .section token
|
||||
:param op: The token
|
||||
"""
|
||||
ASSERT_LEN(op.args, 1)
|
||||
name = op.args[0][1:]
|
||||
ASSERT_IN(name, ('data', 'rodata', 'text'))
|
||||
getattr(self, 'op_' + name)(op)
|
||||
|
||||
def op_text(self, op: 'RiscVPseudoOpToken' = None):
|
||||
"""
|
||||
handles a .text token
|
||||
:param op: The token
|
||||
"""
|
||||
self._set_sec('text', MemoryFlags(read_only=True, executable=True), cls=InstructionMemorySection)
|
||||
|
||||
def op_data(self, op: 'RiscVPseudoOpToken' = None):
|
||||
"""
|
||||
handles a .data token
|
||||
:param op: The token
|
||||
"""
|
||||
self._set_sec('data', MemoryFlags(read_only=False, executable=False))
|
||||
|
||||
def op_rodata(self, op: 'RiscVPseudoOpToken' = None):
|
||||
"""
|
||||
handles a .rodata token
|
||||
:param op: The token
|
||||
"""
|
||||
self._set_sec('rodata', MemoryFlags(read_only=True, executable=False))
|
||||
|
||||
def op_space(self, op: 'RiscVPseudoOpToken'):
|
||||
"""
|
||||
handles a .space token. Inserts empty space into the current (data or rodata) section
|
||||
:param op: The token
|
||||
"""
|
||||
ASSERT_IN(self.active_section, ('data', 'rodata'))
|
||||
ASSERT_LEN(op.args, 1)
|
||||
size = parse_numeric_argument(op.args[0])
|
||||
self._curr_sec().add(bytearray(size))
|
||||
|
||||
def op_ascii(self, op: 'RiscVPseudoOpToken'):
|
||||
"""
|
||||
handles a .ascii token. Inserts ascii encoded text into the currrent data section
|
||||
:param op: The token
|
||||
"""
|
||||
ASSERT_IN(self.active_section, ('data', 'rodata'))
|
||||
ASSERT_LEN(op.args, 1)
|
||||
str = op.args[0][1:-1].encode('ascii').decode('unicode_escape')
|
||||
self._curr_sec().add(bytearray(str, 'ascii'))
|
||||
|
||||
def op_asciiz(self, op: 'RiscVPseudoOpToken'):
|
||||
"""
|
||||
handles a .ascii token. Inserts nullterminated ascii encoded text into the currrent data section
|
||||
:param op: The token
|
||||
"""
|
||||
ASSERT_IN(self.active_section, ('data', 'rodata'))
|
||||
ASSERT_LEN(op.args, 1)
|
||||
str = op.args[0][1:-1].encode('ascii').decode('unicode_escape')
|
||||
self._curr_sec().add(bytearray(str + '\0', 'ascii'))
|
||||
|
||||
def op_global(self, op: 'RiscVPseudoOpToken'):
|
||||
"""
|
||||
handles a .global token. Marks the token as global
|
||||
:param op: The token
|
||||
"""
|
||||
ASSERT_LEN(op.args, 1)
|
||||
name = op.args[0]
|
||||
self.globals.append(name)
|
||||
|
||||
def op_set(self, op: 'RiscVPseudoOpToken'):
|
||||
"""
|
||||
handles a .set name, val token. Sets the symbol name to val
|
||||
:param op: The token
|
||||
"""
|
||||
ASSERT_LEN(op.args, 2)
|
||||
name = op.args[0]
|
||||
val = parse_numeric_argument(op.args[1])
|
||||
self.symbols[name] = ('_static_', val)
|
||||
|
||||
def op_align(self, op: 'RiscVPseudoOpToken'):
|
||||
"""
|
||||
handles an align token. Currently a nop (just not implemented fully yet, as linker handles most alignement tasks)
|
||||
:param op: The token
|
||||
"""
|
||||
pass
|
||||
|
||||
def op_word(self, op: 'RiscVPseudoOpToken'):
|
||||
ASSERT_LEN(op.args, 1)
|
||||
val = parse_numeric_argument(op.args[0])
|
||||
self._curr_sec().add(int_to_bytes(val, 4))
|
||||
|
||||
## Section handler code
|
||||
def _set_sec(self, name: str, flags: MemoryFlags, cls=MemorySection):
|
||||
if name not in self.sections:
|
||||
self.sections[name] = cls(name, flags)
|
||||
self.active_section = name
|
||||
|
||||
def _curr_sec(self):
|
||||
return self.sections[self.active_section]
|
@ -0,0 +1,169 @@
|
||||
from typing import Optional, Tuple, Union
|
||||
from enum import Enum, auto
|
||||
from typing import Optional, Tuple, Union
|
||||
|
||||
from helpers import parse_numeric_argument
|
||||
from .base_types import Program, T_RelativeAddress, InstructionContext
|
||||
from .colors import FMT_PARSE, FMT_NONE
|
||||
from .exceptions import ParseException
|
||||
from .helpers import ASSERT_LEN
|
||||
from .tokenizer import Token
|
||||
from .types import BinaryDataMemorySection, InstructionMemorySection
|
||||
|
||||
|
||||
INSTRUCTION_SECTION_NAMES = ('.text', '.init', '.fini')
|
||||
|
||||
|
||||
class MemorySectionType(Enum):
|
||||
Data = auto()
|
||||
Instructions = auto()
|
||||
|
||||
|
||||
class CurrentSection:
|
||||
name: str
|
||||
data: Union[list, bytearray]
|
||||
type: MemorySectionType
|
||||
|
||||
def current_address(self) -> T_RelativeAddress:
|
||||
if self.type == MemorySectionType.Data:
|
||||
return len(self.data)
|
||||
return len(self.data) * 4
|
||||
|
||||
def __repr__(self):
|
||||
return "{}(name={},data={},type={})".format(
|
||||
self.__class__.__name__, self.name,
|
||||
self.data, self.type.name
|
||||
)
|
||||
|
||||
|
||||
class ParseContext:
|
||||
section: Optional[CurrentSection]
|
||||
context: InstructionContext
|
||||
program: Program
|
||||
|
||||
def __init__(self, name: str):
|
||||
self.program = Program(name)
|
||||
self.context = self.program.context
|
||||
self.section = None
|
||||
|
||||
def finalize(self) -> Program:
|
||||
self.finalize_section()
|
||||
return self.program
|
||||
|
||||
def finalize_section(self):
|
||||
if self.section is None:
|
||||
return
|
||||
if self.section.type == MemorySectionType.Data:
|
||||
section = BinaryDataMemorySection(self.section.data, self.section.name, self.context)
|
||||
self.program.add_section(section)
|
||||
elif self.section.type == MemorySectionType.Instructions:
|
||||
section = InstructionMemorySection(self.section.data, self.section.name, self.context)
|
||||
self.program.add_section(section)
|
||||
|
||||
def __repr__(self):
|
||||
return "{}(\n\tsetion={},\n\tprogram={}\n)".format(
|
||||
self.__class__.__name__, self.section, self.program
|
||||
)
|
||||
|
||||
|
||||
def ASSERT_IN_SECTION_TYPE(context: ParseContext, type: MemorySectionType):
|
||||
if context.section is None:
|
||||
raise ParseException('Error, expected to be in {} section, but no section is present...'.format(type.name))
|
||||
if context.section.type != type:
|
||||
raise ParseException(
|
||||
'Error, expected to be in {} section, but currently in {}...'.format(type.name, context.section)
|
||||
)
|
||||
|
||||
|
||||
def get_section_base_name(section_name: str) -> str:
|
||||
return '.' + section_name.split('.')[1]
|
||||
|
||||
|
||||
class AssemblerDirectives:
|
||||
"""
|
||||
This class represents a collection of all assembler directives as documented by
|
||||
https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md#pseudo-ops
|
||||
|
||||
All class methods prefixed with op_ are directly used as assembler directives.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def op_align(cls, token: Token, args: Tuple[str], context: ParseContext):
|
||||
ASSERT_LEN(args, 1)
|
||||
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
|
||||
align_to = parse_numeric_argument(args[0])
|
||||
current_mod = context.section.current_address() % align_to
|
||||
if current_mod == 0:
|
||||
return
|
||||
context.section.data += bytearray(align_to - current_mod)
|
||||
|
||||
@classmethod
|
||||
def op_section(cls, token: Token, args: Tuple[str], context: ParseContext):
|
||||
ASSERT_LEN(args, 1)
|
||||
context.finalize_section()
|
||||
|
||||
if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES:
|
||||
context.section.type = MemorySectionType.Instructions
|
||||
context.section.data = list()
|
||||
else:
|
||||
context.section.type = MemorySectionType.Data
|
||||
context.section.data = bytearray()
|
||||
context.section.name = args[0]
|
||||
|
||||
@classmethod
|
||||
def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext):
|
||||
ASSERT_LEN(args, 1)
|
||||
context.program.global_labels.add(args[0])
|
||||
|
||||
@classmethod
|
||||
def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext):
|
||||
ASSERT_LEN(args, 2)
|
||||
name = args[0]
|
||||
value = parse_numeric_argument(args[1])
|
||||
context.context.labels[name] = value
|
||||
|
||||
@classmethod
|
||||
def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext):
|
||||
ASSERT_LEN(args, 1)
|
||||
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
|
||||
size = parse_numeric_argument(args[0])
|
||||
cls.add_bytes(size, bytearray(size), context)
|
||||
|
||||
@classmethod
|
||||
def add_bytes(cls, size: int, content: Union[None, int, bytearray], context: ParseContext):
|
||||
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
|
||||
|
||||
if content is None:
|
||||
content = bytearray(size)
|
||||
|
||||
@classmethod
|
||||
def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True):
|
||||
encoded_bytes = bytearray(text.encode('ascii'))
|
||||
if zero_terminate:
|
||||
encoded_bytes += bytearray(1)
|
||||
cls.add_bytes(len(encoded_bytes), encoded_bytes, context)
|
||||
|
||||
@classmethod
|
||||
def handle_instruction(cls, token: Token, args: Tuple[str], context: ParseContext):
|
||||
op = token.value[1:]
|
||||
if hasattr(cls, 'op_' + op):
|
||||
getattr(cls, 'op_' + op)(token, args, context)
|
||||
elif op in ('text', 'data', 'rodata', 'bss', 'sbss'):
|
||||
cls.op_section(token, (token.value,), context)
|
||||
elif op in ('string', 'asciiz', 'asciz', 'ascii'):
|
||||
ASSERT_LEN(args, 1)
|
||||
cls.add_text(args[0], context, op == 'ascii')
|
||||
elif op in DATA_OP_SIZES:
|
||||
size = DATA_OP_SIZES[op]
|
||||
for arg in args:
|
||||
cls.add_bytes(size, parse_numeric_argument(arg), context)
|
||||
else:
|
||||
print(FMT_PARSE + "Unknown assembler directive: {} {} in {}".format(token, args, context) + FMT_NONE)
|
||||
|
||||
|
||||
DATA_OP_SIZES = {
|
||||
'byte': 1,
|
||||
'2byte': 2, 'half': 2, 'short': 2,
|
||||
'4byte': 4, 'word': 4, 'long': 4,
|
||||
'8byte': 8, 'dword': 8, 'quad': 8,
|
||||
}
|
@ -0,0 +1,186 @@
|
||||
"""
|
||||
RiscEmu (c) 2021 Anton Lydike
|
||||
|
||||
SPDX-License-Identifier: MIT
|
||||
|
||||
This file contains base classes which represent loaded programs
|
||||
"""
|
||||
|
||||
import re
|
||||
from abc import ABC
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Tuple, Set
|
||||
from collections import defaultdict
|
||||
|
||||
from .helpers import *
|
||||
|
||||
T_RelativeAddress = int
|
||||
T_AbsoluteAddress = int
|
||||
|
||||
NUMBER_SYMBOL_PATTERN = re.compile(r'^\d+[fb]$')
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemoryFlags:
|
||||
read_only: bool
|
||||
executable: bool
|
||||
|
||||
def __repr__(self):
|
||||
return "{}({},{})".format(
|
||||
self.__class__.__name__,
|
||||
'ro' if self.read_only else 'rw',
|
||||
'x' if self.executable else '-'
|
||||
)
|
||||
|
||||
|
||||
class InstructionContext:
|
||||
base_address: T_AbsoluteAddress
|
||||
"""
|
||||
The address where the instruction block is placed
|
||||
"""
|
||||
|
||||
labels: Dict[str, T_RelativeAddress]
|
||||
"""
|
||||
This dictionary maps all labels to their relative position of the instruction block
|
||||
"""
|
||||
numbered_labels: Dict[str, List[T_RelativeAddress]]
|
||||
"""
|
||||
This dictionary maps numbered labels (which can occur multiple times) to a list of (block-relative) addresses where
|
||||
the label was placed
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.labels = dict()
|
||||
self.numbered_labels = defaultdict(list)
|
||||
self.base_address = 0
|
||||
|
||||
def resolve_label(self, symbol: str, address_at: Optional[T_RelativeAddress] = None) -> Optional[T_RelativeAddress]:
|
||||
if NUMBER_SYMBOL_PATTERN.match(symbol):
|
||||
if address_at is None:
|
||||
raise ParseException("Cannot resolve relative symbol {} without an address!".format(symbol))
|
||||
|
||||
direction = symbol[-1]
|
||||
if direction == 'b':
|
||||
return max([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr < address_at],
|
||||
default=None)
|
||||
else:
|
||||
return min([addr for addr in self.numbered_labels.get(symbol[:-1], []) if addr > address_at],
|
||||
default=None)
|
||||
else:
|
||||
return self.labels.get(symbol, None)
|
||||
|
||||
|
||||
class Instruction(ABC):
|
||||
name: str
|
||||
args: tuple
|
||||
|
||||
@abstractmethod
|
||||
def get_imm(self, num: int) -> int:
|
||||
"""
|
||||
parse and get immediate argument
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_imm_reg(self, num: int) -> Tuple[int, str]:
|
||||
"""
|
||||
parse and get an argument imm(reg)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_reg(self, num: int) -> str:
|
||||
"""
|
||||
parse and get an register argument
|
||||
"""
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "{} {}".format(self.name, ", ".join(self.args))
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemorySection(ABC):
|
||||
name: str
|
||||
flags: MemoryFlags
|
||||
size: int
|
||||
base: T_AbsoluteAddress
|
||||
owner: str
|
||||
context: InstructionContext
|
||||
|
||||
@abstractmethod
|
||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||
pass
|
||||
|
||||
def dump(self, start: T_RelativeAddress, end: Optional[T_RelativeAddress], fmt: str = 'hex',
|
||||
bytes_per_row: int = 16, rows: int = 10, group: int = 4):
|
||||
if self.flags.executable:
|
||||
bytes_per_row = 4
|
||||
highlight = None
|
||||
if end is None:
|
||||
end = start + (bytes_per_row * (rows // 2))
|
||||
highlight = start
|
||||
start = start - (bytes_per_row * (rows // 2))
|
||||
if self.flags.executable:
|
||||
print(FMT_MEM + "{}, viewing {} instructions:".format(
|
||||
self, (end - start) // 4
|
||||
) + FMT_NONE)
|
||||
|
||||
for addr in range(start, end, 4):
|
||||
if addr == highlight:
|
||||
print(FMT_UNDERLINE + FMT_ORANGE, end='')
|
||||
print("0x{:x}: {}{}".format(
|
||||
self.base + addr, self.read_ins(addr), FMT_NONE
|
||||
))
|
||||
else:
|
||||
print(FMT_MEM + "{}, viewing {} bytes:".format(
|
||||
self, (end - start)
|
||||
) + FMT_NONE)
|
||||
|
||||
for addr in range(start, end, bytes_per_row):
|
||||
hi_ind = (highlight - addr) // group
|
||||
print("0x{:x}: {}{}".format(
|
||||
self.base + addr, format_bytes(self.read(addr, bytes_per_row), fmt, group, hi_ind), FMT_NONE
|
||||
))
|
||||
|
||||
def __repr__(self):
|
||||
return "{}[{}] at 0x{:08X} (size={}bytes, flags={}, owner={})".format(
|
||||
self.__class__.__name__,
|
||||
self.name,
|
||||
self.base,
|
||||
self.size,
|
||||
self.flags,
|
||||
self.owner
|
||||
)
|
||||
|
||||
|
||||
class Program:
|
||||
name: str
|
||||
context: InstructionContext
|
||||
global_labels: Set[str]
|
||||
sections: List[MemorySection]
|
||||
base: T_AbsoluteAddress = 0
|
||||
|
||||
def __init__(self, name: str, base: int = 0):
|
||||
self.name = name
|
||||
self.context = InstructionContext()
|
||||
self.sections = []
|
||||
self.base = base
|
||||
self.global_labels = set()
|
||||
|
||||
def add_section(self, sec: MemorySection):
|
||||
self.sections.append(sec)
|
||||
|
||||
def __repr__(self):
|
||||
return "{}(name={},context={},globals={},sections={},base={})".format(
|
||||
self.__class__.__name__, self.name, self.context, self.global_labels,
|
||||
[s.name for s in self.sections], self.base
|
||||
)
|
@ -0,0 +1,79 @@
|
||||
"""
|
||||
RiscEmu (c) 2021 Anton Lydike
|
||||
|
||||
SPDX-License-Identifier: MIT
|
||||
"""
|
||||
import re
|
||||
from typing import Dict, Tuple, Iterable, Callable
|
||||
|
||||
from helpers import Peekable
|
||||
from .assembler import MemorySectionType, ParseContext, AssemblerDirectives
|
||||
from .base_types import Program
|
||||
from .colors import FMT_PARSE
|
||||
from .exceptions import ParseException
|
||||
from .tokenizer import Token, TokenType
|
||||
from .types import SimpleInstruction
|
||||
|
||||
|
||||
def parse_instruction(token: Token, args: Tuple[str], context: ParseContext):
|
||||
if context.section is None or context.section.type != MemorySectionType.Instructions:
|
||||
raise ParseException("{} {} encountered in invalid context: {}".format(token, args, context))
|
||||
ins = SimpleInstruction(token.value, args, context.context, context.section.current_address())
|
||||
context.section.data.append(ins)
|
||||
|
||||
|
||||
def parse_label(token: Token, args: Tuple[str], context: ParseContext):
|
||||
name = token.value[:-1]
|
||||
if re.match(r'^\d+$', name):
|
||||
# relative label:
|
||||
context.context.numbered_labels[name].append(context.section.current_address())
|
||||
else:
|
||||
if name in context.context.labels:
|
||||
print(FMT_PARSE + 'Warn: Symbol {} defined twice!'.format(name))
|
||||
context.context.labels[name] = context.section.current_address()
|
||||
|
||||
|
||||
PARSERS: Dict[TokenType, Callable[[Token, Tuple[str], ParseContext], None]] = {
|
||||
TokenType.PSEUDO_OP: AssemblerDirectives.handle_instruction,
|
||||
TokenType.LABEL: parse_label,
|
||||
TokenType.INSTRUCTION_NAME: parse_instruction
|
||||
}
|
||||
|
||||
|
||||
def parse_tokens(name: str, tokens_iter: Iterable[Token]) -> Program:
|
||||
context = ParseContext(name)
|
||||
|
||||
for token, args in composite_tokenizer(Peekable[Token](tokens_iter)):
|
||||
if token.type not in PARSERS:
|
||||
raise ParseException("Unexpected token type: {}, {}".format(token, args))
|
||||
PARSERS[token.type](token, args, context)
|
||||
|
||||
return context.finalize()
|
||||
|
||||
|
||||
def composite_tokenizer(tokens_iter: Iterable[Token]) -> Iterable[Tuple[Token, Tuple[str]]]:
|
||||
tokens: Peekable[Token] = Peekable[Token](tokens_iter)
|
||||
|
||||
while not tokens.is_empty():
|
||||
token = next(tokens)
|
||||
if token.type in (TokenType.PSEUDO_OP, TokenType.LABEL, TokenType.INSTRUCTION_NAME):
|
||||
yield token, tuple(take_arguments(tokens))
|
||||
|
||||
|
||||
def take_arguments(tokens: Peekable[Token]) -> Iterable[str]:
|
||||
"""
|
||||
Consumes (argument comma)* and yields argument.value until newline is reached
|
||||
If an argument is not followed by either a newline or a comma, a parse exception is raised
|
||||
The newline at the end is consumed
|
||||
:param tokens: A Peekable iterator over some Tokens
|
||||
"""
|
||||
while True:
|
||||
if tokens.peek().type == TokenType.ARGUMENT:
|
||||
yield next(tokens).value
|
||||
if tokens.peek().type == TokenType.COMMA:
|
||||
next(tokens)
|
||||
elif tokens.peek().type == TokenType.NEWLINE:
|
||||
next(tokens)
|
||||
break
|
||||
raise ParseException("Expected newline, instead got {}".format(tokens.peek()))
|
||||
|
@ -0,0 +1,72 @@
|
||||
from typing import List, Tuple
|
||||
from .exceptions import MemoryAccessException
|
||||
from .helpers import parse_numeric_argument
|
||||
from .base_types import Instruction, MemorySection, MemoryFlags, InstructionContext, T_RelativeAddress, \
|
||||
T_AbsoluteAddress
|
||||
|
||||
|
||||
class SimpleInstruction(Instruction):
|
||||
def __init__(self, name: str, args: Tuple[str], context: InstructionContext, addr: T_RelativeAddress):
|
||||
self.context = context
|
||||
self.name = name
|
||||
self.args = args
|
||||
self.addr = addr
|
||||
|
||||
def get_imm(self, num: int) -> int:
|
||||
resolved_label = self.context.resolve_label(self.args[num], self.addr)
|
||||
if resolved_label is None:
|
||||
return parse_numeric_argument(self.args[num])
|
||||
return resolved_label
|
||||
|
||||
def get_imm_reg(self, num: int) -> Tuple[int, str]:
|
||||
return self.get_imm(num + 1), self.get_reg(num)
|
||||
|
||||
def get_reg(self, num: int) -> str:
|
||||
return self.args[num]
|
||||
|
||||
|
||||
class InstructionMemorySection(MemorySection):
|
||||
def __init__(self, instructions: List[Instruction], name: str, context: InstructionContext, base: int = 0):
|
||||
self.name = name
|
||||
self.base = base
|
||||
self.context = context
|
||||
self.size = len(instructions) * 4
|
||||
self.flags = MemoryFlags(True, True)
|
||||
self.instructions = instructions
|
||||
|
||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||
raise MemoryAccessException("Cannot read raw bytes from instruction section", self.base + offset, size, 'read')
|
||||
|
||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||
raise MemoryAccessException("Cannot write raw bytes to instruction section", self.base + offset, size, 'write')
|
||||
|
||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||
if offset % 4 != 0:
|
||||
raise MemoryAccessException("Unaligned instruction fetch!", self.base + offset, 4, 'instruction fetch')
|
||||
return self.instructions[offset // 4]
|
||||
|
||||
|
||||
class BinaryDataMemorySection(MemorySection):
|
||||
def __init__(self, data: bytearray, name: str, context: InstructionContext, base: int = 0):
|
||||
self.name = name
|
||||
self.base = base
|
||||
self.context = context
|
||||
self.size = len(data)
|
||||
self.flags = MemoryFlags(False, False)
|
||||
self.data = data
|
||||
|
||||
def read(self, offset: T_RelativeAddress, size: int) -> bytearray:
|
||||
if offset + size > self.size:
|
||||
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'read')
|
||||
return self.data[offset:offset + size]
|
||||
|
||||
def write(self, offset: T_RelativeAddress, size: int, data: bytearray):
|
||||
if offset + size > self.size:
|
||||
raise MemoryAccessException("Out of bounds access in {}".format(self), offset, size, 'write')
|
||||
if len(data[0:size]) != size:
|
||||
raise MemoryAccessException("Invalid write parameter sizing", offset, size, 'write')
|
||||
self.data[offset:offset + size] = data[0:size]
|
||||
|
||||
def read_ins(self, offset: T_RelativeAddress) -> Instruction:
|
||||
raise MemoryAccessException("Tried reading instruction on non-executable section {}".format(self),
|
||||
offset, 4, 'instruction fetch')
|
Loading…
Reference in New Issue