from unittest import TestCase from riscemu.tokenizer import ( tokenize, print_tokens, Token, TokenType, NEWLINE, COMMA, split_whitespace_respecting_quotes, ) def ins(name: str) -> Token: return Token(TokenType.INSTRUCTION_NAME, name) def arg(name: str) -> Token: return Token(TokenType.ARGUMENT, name) def op(name: str) -> Token: return Token(TokenType.PSEUDO_OP, name) def lbl(name: str) -> Token: return Token(TokenType.LABEL, name) class TestTokenizer(TestCase): def test_instructions(self): program = ["li a0, 144", "divi a0, a0, 12", "xori a1, a0, 12"] tokens = [ ins("li"), arg("a0"), COMMA, arg("144"), NEWLINE, ins("divi"), arg("a0"), COMMA, arg("a0"), COMMA, arg("12"), NEWLINE, ins("xori"), arg("a1"), COMMA, arg("a0"), COMMA, arg("12"), NEWLINE, ] self.assertEqual(list(tokenize(program)), tokens) def test_comments(self): parsed_res = [ins("li"), arg("a0"), COMMA, arg("144"), NEWLINE] for c in ("#", "//", ";"): lines = [c + " this is a comment", "li a0, 144"] self.assertEqual(list(tokenize(lines)), parsed_res) def test_pseudo_ins(self): parsed_res = [ Token(TokenType.PSEUDO_OP, ".section"), Token(TokenType.ARGUMENT, ".text"), NEWLINE, Token(TokenType.PSEUDO_OP, ".type"), Token(TokenType.ARGUMENT, "init"), COMMA, Token(TokenType.ARGUMENT, "@function"), NEWLINE, ] input_program = [".section .text", ".type init, @function"] self.assertEqual(list(tokenize(input_program)), parsed_res) def test_full_program(self): program = """ # a hashtag comment ; semicolon comment followed by an empty line .section .text // double slash comment addi sp, sp, -32 sw s0, 0(ra) section: sub s0, s0, s0 """ tokens = [ op(".section"), arg(".text"), NEWLINE, ins("addi"), arg("sp"), COMMA, arg("sp"), COMMA, arg("-32"), NEWLINE, ins("sw"), arg("s0"), COMMA, arg("ra"), arg("0"), NEWLINE, lbl("section:"), NEWLINE, ins("sub"), arg("s0"), COMMA, arg("s0"), COMMA, arg("s0"), NEWLINE, ] self.assertEqual(list(tokenize(program.splitlines())), tokens) def test_split_whitespace_respecting_quotes_single(self): self.assertEqual(list(split_whitespace_respecting_quotes("test")), ["test"]) def test_split_whitespace_respecting_quotes_empty(self): self.assertEqual(list(split_whitespace_respecting_quotes("")), []) def test_split_whitespace_respecting_quotes_two_parts(self): self.assertEqual( list(split_whitespace_respecting_quotes("test 123")), ["test", "123"] ) def test_split_whitespace_respecting_quotes_whole_quoted(self): self.assertEqual( list(split_whitespace_respecting_quotes("'test 123'")), ["test 123"] ) def test_split_whitespace_respecting_quotes_double_quotes(self): self.assertEqual( list(split_whitespace_respecting_quotes('"test 123"')), ["test 123"] ) def test_split_whitespace_respecting_quotes_quoted_then_normal(self): self.assertEqual( list(split_whitespace_respecting_quotes('"test 123" abc')), ["test 123", "abc"], ) def test_split_whitespace_respecting_quotes_quoted_sorrounded(self): self.assertEqual( list(split_whitespace_respecting_quotes('hello "test 123" abc')), ["hello", "test 123", "abc"], ) def test_split_whitespace_respecting_quotes_weird_spaces(self): self.assertEqual( list(split_whitespace_respecting_quotes('hello "test 123"\tabc')), ["hello", "test 123", "abc"], ) def test_split_whitespace_respecting_quotes_quotes_no_spaces(self): self.assertEqual( list(split_whitespace_respecting_quotes('hello"test 123"abc')), ["hello", "test 123", "abc"], )