set_html($in); $res = $ht->get_text(); $this->assertSame($out, $res, $title); } public static function provide_html2text_cases(): iterable { return [ 0 => [ 'title' => 'Test entry', 'in' => '', 'out' => '', ], 1 => [ 'title' => 'Basic HTML entities', 'in' => '"&', 'out' => '"&', ], 2 => [ 'title' => 'HTML entity string', 'in' => '"', 'out' => '"', ], 3 => [ 'title' => 'HTML entity in H1 tag', 'in' => '

ś

', // ś 'out' => "Ś\n\n", // upper ś ], 4 => [ 'title' => 'H1 tag to upper-case conversion', 'in' => '

ś

', 'out' => "Ś\n\n", ], 5 => [ 'title' => 'H1 inside B tag', 'in' => '

ś

', 'out' => "Ś\n\n", ], 6 => [ 'title' => 'Don\'t remove non-printable chars', 'in' => chr(0x002) . chr(0x003), 'out' => chr(0x002) . chr(0x003), ], 7 => [ 'title' => 'Remove spaces after
', 'in' => 'test
test', 'out' => "test\ntest", ], 8 => [ 'title' => '  handling test', 'in' => '
eye:   test
test:   test
', 'out' => "eye: test\ntest: test", ], 9 => [ 'title' => 'HTML entity in STRONG tag', 'in' => 'ś', // ś 'out' => 'ś', ], 10 => [ 'title' => 'STRONG tag to upper-case conversion', 'in' => 'ś', 'out' => 'ś', ], 11 => [ 'title' => 'STRONG inside B tag', 'in' => 'ś', 'out' => 'ś', ], 12 => [ 'title' => 'Full HTML handling (html tag only)', 'in' => "\n

test

", 'out' => 'test', ], 13 => [ 'title' => 'Full HTML handling (html+head tags)', 'in' => '' . "\n

test

\n", 'out' => 'test', ], 14 => [ 'title' => 'Full HTML handling (html+head+body tags)', 'in' => '' . "\n" . '' . "\n" . '

test

' . '', 'out' => 'test', ], ]; } /** * Test blockquote tags handling */ public function test_multiple_blockquotes() { $html = <<<'EOF'
Begin
OUTER BEGIN
INNER 1

Par 1
INNER 2

Par 2

Par 3

INNER 3
OUTER END
EOF; $ht = new \rcube_html2text($html, false, \rcube_html2text::LINKS_NONE); $res = $ht->get_text(); $this->assertStringContainsString('>> INNER 1', $res, 'Quote inner'); $this->assertStringContainsString('>> INNER 3', $res, 'Quote inner'); $this->assertStringContainsString('> OUTER END', $res, 'Quote outer'); } public function test_broken_blockquotes() { // no end tag $html = <<<'EOF' Begin
QUOTED TEXT
NO END TAG FOUND EOF; $ht = new \rcube_html2text($html, false, \rcube_html2text::LINKS_NONE); $res = $ht->get_text(); $this->assertStringContainsString('QUOTED TEXT NO END TAG FOUND', $res, 'No quoting on invalid html'); // with some (nested) end tags $html = <<<'EOF' Begin
QUOTED TEXT
INNER 1
INNER 2
NO END TAG FOUND EOF; $ht = new \rcube_html2text($html, false, \rcube_html2text::LINKS_NONE); $res = $ht->get_text(); $this->assertStringContainsString('QUOTED TEXT INNER 1 INNER 2 NO END', $res, 'No quoting on invalid html'); } /** * Test links handling */ public function test_links() { $html = 'content'; $expected = 'content [1] Links: ------ [1] http://test.com '; $ht = new \rcube_html2text($html, false, \rcube_html2text::LINKS_END); $res = $ht->get_text(); $this->assertSame($expected, $res, 'Links list'); // href == content (#1490434) $html = 'http://test.com'; $expected = 'http://test.com'; $ht = new \rcube_html2text($html, false, \rcube_html2text::LINKS_END); $res = $ht->get_text(); $this->assertSame($expected, $res, 'Skip link with href == content'); // HTML entities in links $html = 'test3&test4'; $expected = 'test3&test4 [1] Links: ------ [1] http://test.com?test1&test2 '; $ht = new \rcube_html2text($html, false, \rcube_html2text::LINKS_END); $res = $ht->get_text(); $this->assertSame($expected, $res, 'Links with HTML entities'); } /** * Test links handling with backward compatibility boolean flag */ public function test_links_bc_with_boolean() { $html = 'content'; $expected = 'content [1] Links: ------ [1] http://test.com '; $ht = new \rcube_html2text($html, false, true); $res = $ht->get_text(); $this->assertSame($expected, $res, 'Links list'); // href == content (#1490434) $html = 'http://test.com'; $expected = 'http://test.com'; $ht = new \rcube_html2text($html, false, true); $res = $ht->get_text(); $this->assertSame($expected, $res, 'Skip link with href == content'); } /** * Test links inline handling */ public function test_links_inline() { $html = 'content'; $expected = 'content '; $ht = new \rcube_html2text($html, false, \rcube_html2text::LINKS_INLINE); $res = $ht->get_text(); $this->assertSame($expected, $res, 'Links Inline'); // href == content (#1490434) $html = 'http://test.com'; $expected = 'http://test.com'; $ht = new \rcube_html2text($html, false, \rcube_html2text::LINKS_INLINE); $res = $ht->get_text(); $this->assertSame($expected, $res, 'Skip link with href == content'); } /** * Test links handling when not using link list (#5795) * * @dataProvider provide_links_no_list_cases */ #[DataProvider('provide_links_no_list_cases')] public function test_links_no_list($input, $output) { $h2t = new \rcube_html2text($input, false, \rcube_html2text::LINKS_NONE); $res = $h2t->get_text(); $this->assertSame($output, $res, 'Links handling'); } /** * Test links handling when not using link list (#5795) with backward compatibility boolean flag * * @dataProvider provide_links_no_list_cases */ #[DataProvider('provide_links_no_list_cases')] public function test_links_no_list_bc_with_boolean($input, $output) { $h2t = new \rcube_html2text($input, false, false); $res = $h2t->get_text(); $this->assertSame($output, $res, 'Links handling'); } public static function provide_links_no_list_cases(): iterable { return [ [ 'this is content', 'this is content', ], [ 'this is content& test', 'this is content& test', ], [ 'this is content', 'this is content', ], [ 'this is image', 'this is http://test.com', ], ]; } /** * Test links fallback to default handling */ public function test_links_fallback_to_default_link_list() { $html = 'content'; $expected = 'content [1] Links: ------ [1] http://test.com '; $ht = new \rcube_html2text($html, false); $res = $ht->get_text(); $this->assertSame($expected, $res, 'Links list as default (doLinks not set)'); $ht = new \rcube_html2text($html, false, mt_rand(3, 9999)); $res = $ht->get_text(); $this->assertSame($expected, $res, 'Links list as default (doLinks greater than 3)'); $ht = new \rcube_html2text($html, false, mt_rand(-9999, -1)); $res = $ht->get_text(); $this->assertSame($expected, $res, 'Links list as default (doLinks lower than 0)'); } /** * Test huge HTML content (#8137) */ public function test_memory_fix_8137() { // create >1MB input $src = 'data:image/png;base64,' . str_repeat('1234567890abcdefghijklmnopqrstuvwxyz', 50000); $input = 'test

test1

test2

test3

'; $h2t = new \rcube_html2text($input, false, \rcube_html2text::LINKS_NONE); $res = $h2t->get_text(); $this->assertSame("test1\n\ntest2\n\ntest3", $res, 'Huge input'); } }