You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
257 lines
8.1 KiB
Python
257 lines
8.1 KiB
Python
from enum import Enum, auto
|
|
from typing import List
|
|
from typing import Optional, Tuple, Union
|
|
|
|
from .colors import FMT_PARSE, FMT_NONE
|
|
from riscemu.types.exceptions import ParseException, ASSERT_LEN
|
|
from .helpers import parse_numeric_argument, align_addr, get_section_base_name
|
|
from .tokenizer import Token
|
|
from .types import (
|
|
Program,
|
|
T_RelativeAddress,
|
|
InstructionContext,
|
|
Instruction,
|
|
BinaryDataMemorySection,
|
|
InstructionMemorySection,
|
|
Int32,
|
|
)
|
|
|
|
INSTRUCTION_SECTION_NAMES = (".text", ".init", ".fini")
|
|
"""
|
|
A tuple containing all section names which contain executable code (instead of data)
|
|
|
|
The first segment of each segment (first segment of ".text.main" is ".text") is checked
|
|
against this list to determine the type of it.
|
|
"""
|
|
|
|
|
|
class MemorySectionType(Enum):
|
|
Data = auto()
|
|
Instructions = auto()
|
|
|
|
|
|
class CurrentSection:
|
|
name: str
|
|
data: Union[List[Instruction], bytearray]
|
|
type: MemorySectionType
|
|
base: int
|
|
|
|
def __init__(self, name: str, type: MemorySectionType, base: int = 0):
|
|
self.name = name
|
|
self.type = type
|
|
self.base = base
|
|
if self.type == MemorySectionType.Data:
|
|
self.data = bytearray()
|
|
elif self.type == MemorySectionType.Instructions:
|
|
self.data = list()
|
|
else:
|
|
raise ParseException("Unknown section type: {}".format(type))
|
|
|
|
def current_address(self) -> T_RelativeAddress:
|
|
if self.type == MemorySectionType.Data:
|
|
return len(self.data) + self.base
|
|
return len(self.data) * 4 + self.base
|
|
|
|
def __repr__(self):
|
|
return "{}(name={},data={},type={})".format(
|
|
self.__class__.__name__, self.name, self.data, self.type.name
|
|
)
|
|
|
|
|
|
class ParseContext:
|
|
section: Optional[CurrentSection]
|
|
context: InstructionContext
|
|
program: Program
|
|
|
|
def __init__(self, name: str):
|
|
self.program = Program(name)
|
|
self.context = self.program.context
|
|
self.section = None
|
|
|
|
def finalize(self) -> Program:
|
|
self._finalize_section()
|
|
return self.program
|
|
|
|
def _finalize_section(self):
|
|
if self.section is None:
|
|
return
|
|
|
|
if self.section.type == MemorySectionType.Data:
|
|
section = BinaryDataMemorySection(
|
|
self.section.data,
|
|
self.section.name,
|
|
self.context,
|
|
self.program.name,
|
|
self.section.base,
|
|
)
|
|
self.program.add_section(section)
|
|
elif self.section.type == MemorySectionType.Instructions:
|
|
section = InstructionMemorySection(
|
|
self.section.data,
|
|
self.section.name,
|
|
self.context,
|
|
self.program.name,
|
|
self.section.base,
|
|
)
|
|
self.program.add_section(section)
|
|
|
|
self.section = None
|
|
|
|
def new_section(self, name: str, type: MemorySectionType, alignment: int = 4):
|
|
base = align_addr(self.current_address(), alignment)
|
|
|
|
self._finalize_section()
|
|
self.section = CurrentSection(name, type, base)
|
|
|
|
def add_label(
|
|
self, name: str, value: int, is_global: bool = False, is_relative: bool = False
|
|
):
|
|
self.context.labels[name] = value
|
|
if is_global:
|
|
self.program.global_labels.add(name)
|
|
if is_relative:
|
|
self.program.relative_labels.add(name)
|
|
|
|
def current_address(self):
|
|
if self.section:
|
|
return self.section.current_address()
|
|
return self.program.base if self.program.base is not None else 0
|
|
|
|
def __repr__(self):
|
|
return "{}(\n\tsetion={},\n\tprogram={}\n)".format(
|
|
self.__class__.__name__, self.section, self.program
|
|
)
|
|
|
|
|
|
def ASSERT_IN_SECTION_TYPE(context: ParseContext, type: MemorySectionType):
|
|
if context.section is None:
|
|
raise ParseException(
|
|
"Error, expected to be in {} section, but no section is present...".format(
|
|
type.name
|
|
)
|
|
)
|
|
if context.section.type != type:
|
|
raise ParseException(
|
|
"Error, expected to be in {} section, but currently in {}...".format(
|
|
type.name, context.section
|
|
)
|
|
)
|
|
|
|
|
|
class AssemblerDirectives:
|
|
"""
|
|
This class represents a collection of all assembler directives as documented by
|
|
https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md#pseudo-ops
|
|
|
|
All class methods prefixed with op_ are directly used as assembler directives.
|
|
"""
|
|
|
|
@classmethod
|
|
def op_align(cls, token: Token, args: Tuple[str], context: ParseContext):
|
|
ASSERT_LEN(args, 1)
|
|
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
|
|
align_to = parse_numeric_argument(args[0])
|
|
current_mod = context.current_address() % align_to
|
|
if current_mod == 0:
|
|
return
|
|
context.section.data += bytearray(align_to - current_mod)
|
|
|
|
@classmethod
|
|
def op_section(cls, token: Token, args: Tuple[str], context: ParseContext):
|
|
ASSERT_LEN(args, 1)
|
|
if get_section_base_name(args[0]) in INSTRUCTION_SECTION_NAMES:
|
|
context.new_section(args[0], MemorySectionType.Instructions)
|
|
else:
|
|
context.new_section(args[0], MemorySectionType.Data)
|
|
|
|
@classmethod
|
|
def op_globl(cls, token: Token, args: Tuple[str], context: ParseContext):
|
|
ASSERT_LEN(args, 1)
|
|
context.program.global_labels.add(args[0])
|
|
|
|
@classmethod
|
|
def op_global(cls, token: Token, args: Tuple[str], context: ParseContext):
|
|
cls.op_globl(token, args, context)
|
|
|
|
@classmethod
|
|
def op_equ(cls, token: Token, args: Tuple[str], context: ParseContext):
|
|
ASSERT_LEN(args, 2)
|
|
name = args[0]
|
|
value = parse_numeric_argument(args[1])
|
|
context.context.labels[name] = value
|
|
|
|
@classmethod
|
|
def op_space(cls, token: Token, args: Tuple[str], context: ParseContext):
|
|
ASSERT_LEN(args, 1)
|
|
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
|
|
|
|
size = parse_numeric_argument(args[0])
|
|
cls.add_bytes(size, None, context)
|
|
|
|
@classmethod
|
|
def op_zero(cls, token: Token, args: Tuple[str], context: ParseContext):
|
|
ASSERT_LEN(args, 1)
|
|
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
|
|
size = parse_numeric_argument(args[0])
|
|
cls.add_bytes(size, bytearray(size), context)
|
|
|
|
@classmethod
|
|
def add_bytes(
|
|
cls, size: int, content: Union[None, int, bytearray], context: ParseContext
|
|
):
|
|
ASSERT_IN_SECTION_TYPE(context, MemorySectionType.Data)
|
|
|
|
if content is None:
|
|
content = bytearray(size)
|
|
if isinstance(content, int):
|
|
content = Int32(content).to_bytes(size)
|
|
|
|
context.section.data += content
|
|
|
|
@classmethod
|
|
def add_text(cls, text: str, context: ParseContext, zero_terminate: bool = True):
|
|
# replace '\t' and '\n' escape sequences
|
|
text = text.replace("\\n", "\n").replace("\\t", "\t")
|
|
|
|
encoded_bytes = bytearray(text.encode("ascii"))
|
|
if zero_terminate:
|
|
encoded_bytes += bytearray(1)
|
|
cls.add_bytes(len(encoded_bytes), encoded_bytes, context)
|
|
|
|
@classmethod
|
|
def handle_instruction(cls, token: Token, args: Tuple[str], context: ParseContext):
|
|
op = token.value[1:]
|
|
if hasattr(cls, "op_" + op):
|
|
getattr(cls, "op_" + op)(token, args, context)
|
|
elif op in ("text", "data", "rodata", "bss", "sbss"):
|
|
cls.op_section(token, (token.value,), context)
|
|
elif op in ("string", "asciiz", "asciz", "ascii"):
|
|
ASSERT_LEN(args, 1)
|
|
cls.add_text(args[0], context, zero_terminate=(op != "ascii"))
|
|
elif op in DATA_OP_SIZES:
|
|
size = DATA_OP_SIZES[op]
|
|
for arg in args:
|
|
cls.add_bytes(size, parse_numeric_argument(arg), context)
|
|
else:
|
|
print(
|
|
FMT_PARSE
|
|
+ "Unknown assembler directive: {} {} in {}".format(
|
|
token, args, context
|
|
)
|
|
+ FMT_NONE
|
|
)
|
|
|
|
|
|
DATA_OP_SIZES = {
|
|
"byte": 1,
|
|
"2byte": 2,
|
|
"half": 2,
|
|
"short": 2,
|
|
"4byte": 4,
|
|
"word": 4,
|
|
"long": 4,
|
|
"8byte": 8,
|
|
"dword": 8,
|
|
"quad": 8,
|
|
}
|