You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
riscemu/test/test_tokenizer.py

162 lines
4.4 KiB
Python

from unittest import TestCase
from riscemu.tokenizer import (
tokenize,
print_tokens,
Token,
TokenType,
NEWLINE,
COMMA,
split_whitespace_respecting_quotes,
)
def ins(name: str) -> Token:
return Token(TokenType.INSTRUCTION_NAME, name)
def arg(name: str) -> Token:
return Token(TokenType.ARGUMENT, name)
def op(name: str) -> Token:
return Token(TokenType.PSEUDO_OP, name)
def lbl(name: str) -> Token:
return Token(TokenType.LABEL, name)
class TestTokenizer(TestCase):
def test_instructions(self):
program = ["li a0, 144", "divi a0, a0, 12", "xori a1, a0, 12"]
tokens = [
ins("li"),
arg("a0"),
COMMA,
arg("144"),
NEWLINE,
ins("divi"),
arg("a0"),
COMMA,
arg("a0"),
COMMA,
arg("12"),
NEWLINE,
ins("xori"),
arg("a1"),
COMMA,
arg("a0"),
COMMA,
arg("12"),
NEWLINE,
]
self.assertEqual(list(tokenize(program)), tokens)
def test_comments(self):
parsed_res = [ins("li"), arg("a0"), COMMA, arg("144"), NEWLINE]
for c in ("#", "//", ";"):
lines = [c + " this is a comment", "li a0, 144"]
self.assertEqual(list(tokenize(lines)), parsed_res)
def test_pseudo_ins(self):
parsed_res = [
Token(TokenType.PSEUDO_OP, ".section"),
Token(TokenType.ARGUMENT, ".text"),
NEWLINE,
Token(TokenType.PSEUDO_OP, ".type"),
Token(TokenType.ARGUMENT, "init"),
COMMA,
Token(TokenType.ARGUMENT, "@function"),
NEWLINE,
]
input_program = [".section .text", ".type init, @function"]
self.assertEqual(list(tokenize(input_program)), parsed_res)
def test_full_program(self):
program = """
# a hashtag comment
; semicolon comment followed by an empty line
.section .text
// double slash comment
addi sp, sp, -32
sw s0, 0(ra)
section:
sub s0, s0, s0
"""
tokens = [
op(".section"),
arg(".text"),
NEWLINE,
ins("addi"),
arg("sp"),
COMMA,
arg("sp"),
COMMA,
arg("-32"),
NEWLINE,
ins("sw"),
arg("s0"),
COMMA,
arg("ra"),
arg("0"),
NEWLINE,
lbl("section:"),
NEWLINE,
ins("sub"),
arg("s0"),
COMMA,
arg("s0"),
COMMA,
arg("s0"),
NEWLINE,
]
self.assertEqual(list(tokenize(program.splitlines())), tokens)
def test_split_whitespace_respecting_quotes_single(self):
self.assertEqual(list(split_whitespace_respecting_quotes("test")), ["test"])
def test_split_whitespace_respecting_quotes_empty(self):
self.assertEqual(list(split_whitespace_respecting_quotes("")), [])
def test_split_whitespace_respecting_quotes_two_parts(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("test 123")), ["test", "123"]
)
def test_split_whitespace_respecting_quotes_whole_quoted(self):
self.assertEqual(
list(split_whitespace_respecting_quotes("'test 123'")), ["test 123"]
)
def test_split_whitespace_respecting_quotes_double_quotes(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('"test 123"')), ["test 123"]
)
def test_split_whitespace_respecting_quotes_quoted_then_normal(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('"test 123" abc')),
["test 123", "abc"],
)
def test_split_whitespace_respecting_quotes_quoted_sorrounded(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello "test 123" abc')),
["hello", "test 123", "abc"],
)
def test_split_whitespace_respecting_quotes_weird_spaces(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello "test 123"\tabc')),
["hello", "test 123", "abc"],
)
def test_split_whitespace_respecting_quotes_quotes_no_spaces(self):
self.assertEqual(
list(split_whitespace_respecting_quotes('hello"test 123"abc')),
["hello", "test 123", "abc"],
)