From 2cd0509b503332b1989f06da45d569d4d2929be5 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Thu, 3 Sep 2020 17:46:26 +0200 Subject: Improve regex to extract HTML metadata (title, description, etc.) Also added a bunch of tests to cover more use cases. Fixes #1375 --- tests/bookmark/LinkUtilsTest.php | 89 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'tests') diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php index 7d4a7b89..cc7819bc 100644 --- a/tests/bookmark/LinkUtilsTest.php +++ b/tests/bookmark/LinkUtilsTest.php @@ -81,8 +81,78 @@ class LinkUtilsTest extends TestCase public function testHtmlExtractExistentNameTag() { $description = 'Bob and Alice share cookies.'; + + // Simple one line $html = 'stuff2'; $this->assertEquals($description, html_extract_tag('description', $html)); + + // Simple OpenGraph + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // Simple reversed OpenGraph + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // ItemProp OpenGraph + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph without quotes + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph reversed without quotes + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph with noise + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph reversed with noise + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph multiple properties start + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph multiple properties end + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph multiple properties both end + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph multiple properties both end with noise + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph reversed multiple properties start + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph reversed multiple properties end + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph reversed multiple properties both end + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // OpenGraph reversed multiple properties both end with noise + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); + + // Suggestion from #1375 + $html = ''; + $this->assertEquals($description, html_extract_tag('description', $html)); } /** @@ -92,6 +162,25 @@ class LinkUtilsTest extends TestCase { $html = 'stuff2'; $this->assertFalse(html_extract_tag('description', $html)); + + // Partial meta tag + $html = ''; + $this->assertFalse(html_extract_tag('description', $html)); + + $html = ''; + $this->assertFalse(html_extract_tag('description', $html)); + + $html = ''; + $this->assertFalse(html_extract_tag('description', $html)); + + $html = ''; + $this->assertFalse(html_extract_tag('description', $html)); + + $html = ''; + $this->assertFalse(html_extract_tag('description', $html)); + + $html = ''; + $this->assertFalse(html_extract_tag('description', $html)); } /** -- cgit v1.2.3