from unittest import TestCase from riscemu.tokenizer import tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA, \ split_whitespace_respecting_quotes def ins(name: str) -> Token: return Token(TokenType.INSTRUCTION_NAME, name) def arg(name: str) -> Token: return Token(TokenType.ARGUMENT, name) def op(name: str) -> Token: return Token(TokenType.PSEUDO_OP, name) def lbl(name: str) -> Token: return Token(TokenType.LABEL, name) class TestTokenizer(TestCase): def test_instructions(self): program = [ 'li a0, 144', 'divi a0, a0, 12', 'xori a1, a0, 12' ] tokens = [ ins('li'), arg('a0'), COMMA, arg('144'), NEWLINE, ins('divi'), arg('a0'), COMMA, arg('a0'), COMMA, arg('12'), NEWLINE, ins('xori'), arg('a1'), COMMA, arg('a0'), COMMA, arg('12'), NEWLINE, ] self.assertEqual(list(tokenize(program)), tokens) def test_comments(self): parsed_res = [ ins('li'), arg('a0'), COMMA, arg('144'), NEWLINE ] for c in ('#', '//', ';'): lines = [ c + ' this is a comment', 'li a0, 144' ] self.assertEqual(list(tokenize(lines)), parsed_res) def test_pseudo_ins(self): parsed_res = [ Token(TokenType.PSEUDO_OP, '.section'), Token(TokenType.ARGUMENT, '.text'), NEWLINE, Token(TokenType.PSEUDO_OP, '.type'), Token(TokenType.ARGUMENT, 'init'), COMMA, Token(TokenType.ARGUMENT, '@function'), NEWLINE ] input_program = [ '.section .text', '.type init, @function' ] self.assertEqual(list(tokenize(input_program)), parsed_res) def test_full_program(self): program = """ # a hashtag comment ; semicolon comment followed by an empty line .section .text // double slash comment addi sp, sp, -32 sw s0, 0(ra) section: sub s0, s0, s0 """ tokens = [ op('.section'), arg('.text'), NEWLINE, ins('addi'), arg('sp'), COMMA, arg('sp'), COMMA, arg('-32'), NEWLINE, ins('sw'), arg('s0'), COMMA, arg('ra'), arg('0'), NEWLINE, lbl('section:'), NEWLINE, ins('sub'), arg('s0'), COMMA, arg('s0'), COMMA, arg('s0'), NEWLINE ] self.assertEqual(list(tokenize(program.splitlines())), tokens) def test_split_whitespace_respecting_quotes_single(self): self.assertEqual( list(split_whitespace_respecting_quotes("test")), ["test"] ) def test_split_whitespace_respecting_quotes_empty(self): self.assertEqual( list(split_whitespace_respecting_quotes("")), [] ) def test_split_whitespace_respecting_quotes_two_parts(self): self.assertEqual( list(split_whitespace_respecting_quotes("test 123")), ["test", "123"] ) def test_split_whitespace_respecting_quotes_whole_quoted(self): self.assertEqual( list(split_whitespace_respecting_quotes("'test 123'")), ["test 123"] ) def test_split_whitespace_respecting_quotes_double_quotes(self): self.assertEqual( list(split_whitespace_respecting_quotes('"test 123"')), ["test 123"] ) def test_split_whitespace_respecting_quotes_quoted_then_normal(self): self.assertEqual( list(split_whitespace_respecting_quotes('"test 123" abc')), ["test 123", "abc"] ) def test_split_whitespace_respecting_quotes_quoted_sorrounded(self): self.assertEqual( list(split_whitespace_respecting_quotes('hello "test 123" abc')), ["hello", "test 123", "abc"] ) def test_split_whitespace_respecting_quotes_weird_spaces(self): self.assertEqual( list(split_whitespace_respecting_quotes('hello "test 123"\tabc')), ["hello", "test 123", "abc"] ) def test_split_whitespace_respecting_quotes_quotes_no_spaces(self): self.assertEqual( list(split_whitespace_respecting_quotes('hello"test 123"abc')), ["hello", "test 123", "abc"] )