#pylint: disable=too-many-public-methods from functools import reduce import os, re, shutil, tempfile import unittest from gleetex import htmlhandling excl_filename = htmlhandling.HtmlImageFormatter.EXCLUSION_FILE_NAME HTML_SKELETON = ''' {1}''' def read(file_name, mode='r', encoding='utf-8'): """Read the file, return the string. Close file properly.""" with open(file_name, mode, encoding=encoding) as handle: return handle.read() class HtmlparserTest(unittest.TestCase): def setUp(self): self.p = htmlhandling.EqnParser() def test_start_tags_are_parsed_literally(self): self.p.feed("

") self.assertEqual(self.p.get_data()[0], "

", "The HTML parser should copy start tags literally.") def test_that_end_tags_are_copied_literally(self): self.p.feed("

") self.assertEqual(''.join(self.p.get_data()), "

") def test_entities_are_unchanged(self): self.p.feed(" ") self.assertEqual(self.p.get_data()[0], ' ') def test_charsets_are_copied(self): self.p.feed('>→') self.assertEqual(''.join(self.p.get_data()[0]), '>→') def test_without_eqn_all_blocks_are_strings(self): self.p.feed("\n

blah

") self.assertTrue(reduce(lambda x,y: x and isinstance(y, str), self.p.get_data()), "all chunks have to be strings") def test_equation_is_detected(self): self.p.feed('foo \\pi') self.assertTrue(isinstance(self.p.get_data()[0], (tuple, list))) self.assertEqual(self.p.get_data()[0][2], 'foo \\pi') def test_tag_followed_by_eqn_is_correctly_recognized(self): self.p.feed('

bar') self.assertEqual(self.p.get_data()[0], '

') self.assertTrue(isinstance(self.p.get_data(), list), "second item of data must be equation data list") def test_document_with_tag_then_eqn_then_tag_works(self): self.p.feed('

bar

baz') eqn = None # test should not depend on a specific position of equation, search for # it data = self.p.get_data() for chunk in data: if isinstance(chunk, (tuple, list)): eqn = chunk break self.assertTrue(isinstance(data[0], str)) self.assertTrue(eqn is not None, "No equation found, must be tuple/list object.") self.assertTrue(isinstance(data[-1], str)) def test_equation_is_copied_literally(self): self.p.feed('my\nlittle\n\\tau') self.assertEqual(self.p.get_data()[0][2], 'my\nlittle\n\\tau') def test_unclosed_eqns_are_detected(self): self.assertRaises(htmlhandling.ParseException, self.p.feed, '

\\endless\\formula') def test_nested_formulas_trigger_exception(self): self.assertRaises(htmlhandling.ParseException, self.p.feed, "\\pi") self.assertRaises(htmlhandling.ParseException, self.p.feed, "\\pi

") def test_formulas_without_displaymath_attribute_are_detected(self): self.p.feed('

\frac12
bar

') formulas = [c for c in self.p.get_data() if isinstance(c, (tuple, list))] self.assertEqual(len(formulas), 2) # there should be _2_ formulas self.assertEqual(formulas[0][1], False) # no displaymath self.assertEqual(formulas[1][1], False) # no displaymath def test_that_unclosed_formulas_detected(self): self.assertRaises(htmlhandling.ParseException, self.p.feed, "\\pi

") self.assertRaises(htmlhandling.ParseException, self.p.feed, "\\pi") def test_formula_contains_only_formula(self): p = htmlhandling.EqnParser() p.feed("

") formula = next(e for e in p.get_data() if isinstance(e, (list, tuple))) self.assertEqual(formula[-1], "1test

') formula = next(e for e in p.get_data() if isinstance(e, (list, tuple))) self.assertEqual(formula[-1], "test") p = htmlhandling.EqnParser() p.feed("

") formula = next(e for e in p.get_data() if isinstance(e, (list, tuple))) self.assertEqual(formula[-1], "1a>b') formula = self.p.get_data()[0] self.assertEqual(formula[-1], "a>b") def test_displaymath_is_recognized(self): self.p.feed('\\sum\limits_{n=1}^{e^i} a^nl^n') self.assertEqual(self.p.get_data()[0][1], True) # displaymath flag set def test_encoding_is_parsed_from_HTML4(self): iso8859_1 = HTML_SKELETON.format('iso-8859-15', 'öäüß').encode('iso-8859-1') self.p.feed(iso8859_1) self.assertEqual(self.p._EqnParser__encoding, 'iso-8859-15') def test_encoding_is_parsed_from_HTML5(self): document = r"""

""" self.p.feed(document.encode('utf-8')) self.assertEqual(self.p._EqnParser__encoding.lower(), 'utf-8') def test_strings_can_be_passed_tO_parser_as_well(self): # no exception - everything is working as expected self.p.feed(HTML_SKELETON.format('utf-8', 'æø')) class GetPositionTest(unittest.TestCase): def test_that_line_number_is_correct(self): self.assertEqual(htmlhandling.get_position('jojo', 0)[0], 0) self.assertEqual(htmlhandling.get_position('jojo', 3)[0], 0) self.assertEqual(htmlhandling.get_position('a\njojo', 3)[0], 1) self.assertEqual(htmlhandling.get_position('a\n\njojo', 3)[0], 2) def test_that_position_on_line_is_correct(self): self.assertEqual(htmlhandling.get_position('jojo', 0)[1], 0) self.assertEqual(htmlhandling.get_position('jojo', 3)[1], 3) self.assertEqual(htmlhandling.get_position('a\njojo', 3)[1], 2) self.assertEqual(htmlhandling.get_position('a\n\njojo', 3)[1], 1) class HtmlImageTest(unittest.TestCase): def setUp(self): self.pos = {'depth':99, 'height' : 88, 'width' : 77} self.original_directory = os.getcwd() self.tmpdir = tempfile.mkdtemp() os.chdir(self.tmpdir) def tearDown(self): os.chdir(self.original_directory) shutil.rmtree(self.tmpdir, ignore_errors=True) def test_that_no_file_is_written_if_no_content(self): with htmlhandling.HtmlImageFormatter('foo.html'): pass self.assertFalse(os.path.exists('foo.html') ) def test_file_if_written_when_content_exists(self): with htmlhandling.HtmlImageFormatter() as img: img.format_excluded(self.pos, '\\tau\\tau', 'foo.png') self.assertTrue(os.path.exists(excl_filename) ) def test_written_file_starts_and_ends_more_or_less_properly(self): with htmlhandling.HtmlImageFormatter('.') as img: img.format_excluded(self.pos, '\\tau\\tau', 'foo.png') data = read(htmlhandling.HtmlImageFormatter.EXCLUSION_FILE_NAME, 'r', encoding='utf-8') self.assertTrue('' in data) self.assertTrue('' in data) # make sure encoding is specified self.assertTrue('

%s

\n' % (\ ('

' if hr else ''), htmlhandling.gen_id(formula), formula) class OutsourcingParserTest(unittest.TestCase): def setUp(self): self.html = ('\n\n\n' + '' + 'Outsourced Formulas\n\n

heading

') def get_html(self, string): """Create html string with head / tail und put the specified string into it.""" return self.html + string + '\n\n' def test_formulas_are_recognized(self): data = self.get_html(htmleqn('\\tau')) parser = htmlhandling.OutsourcedFormulaParser() parser.feed(data) self.assertEqual(len(parser.get_formulas()), 1) def test_formula_doesnt_contain_surrounding_rubbish(self): data = self.get_html(htmleqn('\\gamma')) parser = htmlhandling.OutsourcedFormulaParser() parser.feed(data) self.assertEqual(len(parser.get_formulas()), 1) key = next(iter(parser.get_formulas())) par = parser.get_formulas()[key] self.assertFalse('' in par) self.assertFalse('hr' in par) def test_that_header_is_parsed_correctly(self): p = htmlhandling.OutsourcedFormulaParser() p.feed(self.get_html(htmleqn('test123', False))) head = p.get_head() self.assertTrue('DOCTYPE' in head) self.assertTrue('