You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
127 lines
4.1 KiB
Python
127 lines
4.1 KiB
Python
from unittest import TestCase
|
|
|
|
from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA, \
|
|
split_whitespace_respecting_quotes
|
|
|
|
|
|
def ins(name: str) -> Token:
|
|
return Token(TokenType.INSTRUCTION_NAME, name)
|
|
|
|
|
|
def arg(name: str) -> Token:
|
|
return Token(TokenType.ARGUMENT, name)
|
|
|
|
|
|
def op(name: str) -> Token:
|
|
return Token(TokenType.PSEUDO_OP, name)
|
|
|
|
|
|
def lbl(name: str) -> Token:
|
|
return Token(TokenType.LABEL, name)
|
|
|
|
|
|
class TestTokenizer(TestCase):
|
|
|
|
def test_instructions(self):
|
|
program = [
|
|
'li a0, 144',
|
|
'divi a0, a0, 12',
|
|
'xori a1, a0, 12'
|
|
]
|
|
tokens = [
|
|
ins('li'), arg('a0'), COMMA, arg('144'), NEWLINE,
|
|
ins('divi'), arg('a0'), COMMA, arg('a0'), COMMA, arg('12'), NEWLINE,
|
|
ins('xori'), arg('a1'), COMMA, arg('a0'), COMMA, arg('12'), NEWLINE,
|
|
]
|
|
self.assertEqual(list(tokenize(program)), tokens)
|
|
|
|
def test_comments(self):
|
|
parsed_res = [
|
|
ins('li'), arg('a0'), COMMA, arg('144'), NEWLINE
|
|
]
|
|
for c in ('#', '//', ';'):
|
|
lines = [
|
|
c + ' this is a comment',
|
|
'li a0, 144'
|
|
]
|
|
self.assertEqual(list(tokenize(lines)), parsed_res)
|
|
|
|
def test_pseudo_ins(self):
|
|
parsed_res = [
|
|
Token(TokenType.PSEUDO_OP, '.section'), Token(TokenType.ARGUMENT, '.text'), NEWLINE,
|
|
Token(TokenType.PSEUDO_OP, '.type'), Token(TokenType.ARGUMENT, 'init'), COMMA,
|
|
Token(TokenType.ARGUMENT, '@function'), NEWLINE
|
|
]
|
|
input_program = [
|
|
'.section .text',
|
|
'.type init, @function'
|
|
]
|
|
self.assertEqual(list(tokenize(input_program)), parsed_res)
|
|
|
|
def test_full_program(self):
|
|
program = """
|
|
# a hashtag comment
|
|
|
|
; semicolon comment followed by an empty line
|
|
.section .text
|
|
// double slash comment
|
|
addi sp, sp, -32
|
|
sw s0, 0(ra)
|
|
section:
|
|
sub s0, s0, s0
|
|
"""
|
|
tokens = [
|
|
op('.section'), arg('.text'), NEWLINE,
|
|
ins('addi'), arg('sp'), COMMA, arg('sp'), COMMA, arg('-32'), NEWLINE,
|
|
ins('sw'), arg('s0'), COMMA, arg('ra'), arg('0'), NEWLINE,
|
|
lbl('section:'), NEWLINE,
|
|
ins('sub'), arg('s0'), COMMA, arg('s0'), COMMA, arg('s0'), NEWLINE
|
|
]
|
|
|
|
self.assertEqual(list(tokenize(program.splitlines())), tokens)
|
|
|
|
def test_split_whitespace_respecting_quotes_single(self):
|
|
self.assertEqual(
|
|
list(split_whitespace_respecting_quotes("test")), ["test"]
|
|
)
|
|
|
|
def test_split_whitespace_respecting_quotes_empty(self):
|
|
self.assertEqual(
|
|
list(split_whitespace_respecting_quotes("")), []
|
|
)
|
|
|
|
def test_split_whitespace_respecting_quotes_two_parts(self):
|
|
self.assertEqual(
|
|
list(split_whitespace_respecting_quotes("test 123")), ["test", "123"]
|
|
)
|
|
|
|
def test_split_whitespace_respecting_quotes_whole_quoted(self):
|
|
self.assertEqual(
|
|
list(split_whitespace_respecting_quotes("'test 123'")), ["test 123"]
|
|
)
|
|
|
|
def test_split_whitespace_respecting_quotes_double_quotes(self):
|
|
self.assertEqual(
|
|
list(split_whitespace_respecting_quotes('"test 123"')), ["test 123"]
|
|
)
|
|
|
|
def test_split_whitespace_respecting_quotes_quoted_then_normal(self):
|
|
self.assertEqual(
|
|
list(split_whitespace_respecting_quotes('"test 123" abc')), ["test 123", "abc"]
|
|
)
|
|
|
|
def test_split_whitespace_respecting_quotes_quoted_sorrounded(self):
|
|
self.assertEqual(
|
|
list(split_whitespace_respecting_quotes('hello "test 123" abc')), ["hello", "test 123", "abc"]
|
|
)
|
|
|
|
def test_split_whitespace_respecting_quotes_weird_spaces(self):
|
|
self.assertEqual(
|
|
list(split_whitespace_respecting_quotes('hello "test 123"\tabc')), ["hello", "test 123", "abc"]
|
|
)
|
|
|
|
def test_split_whitespace_respecting_quotes_quotes_no_spaces(self):
|
|
self.assertEqual(
|
|
list(split_whitespace_respecting_quotes('hello"test 123"abc')), ["hello", "test 123", "abc"]
|
|
)
|