diff options
-rw-r--r-- | application/bookmark/LinkUtils.php | 8 | ||||
-rw-r--r-- | tests/bookmark/LinkUtilsTest.php | 30 |
2 files changed, 34 insertions, 4 deletions
diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php index 17c37979..a74fda57 100644 --- a/application/bookmark/LinkUtils.php +++ b/application/bookmark/LinkUtils.php | |||
@@ -68,16 +68,16 @@ function html_extract_tag($tag, $html) | |||
68 | $properties = implode('|', $propertiesKey); | 68 | $properties = implode('|', $propertiesKey); |
69 | // We need a OR here to accept either 'property=og:noquote' or 'property="og:unrelated og:my-tag"' | 69 | // We need a OR here to accept either 'property=og:noquote' or 'property="og:unrelated og:my-tag"' |
70 | $orCondition = '["\']?(?:og:)?'. $tag .'["\']?|["\'][^\'"]*?(?:og:)?' . $tag . '[^\'"]*?[\'"]'; | 70 | $orCondition = '["\']?(?:og:)?'. $tag .'["\']?|["\'][^\'"]*?(?:og:)?' . $tag . '[^\'"]*?[\'"]'; |
71 | // Try to retrieve OpenGraph image. | 71 | // Try to retrieve OpenGraph tag. |
72 | $ogRegex = '#<meta[^>]+(?:'. $properties .')=(?:'. $orCondition .')[^>]*content=["\'](.*?)["\'].*?>#'; | 72 | $ogRegex = '#<meta[^>]+(?:'. $properties .')=(?:'. $orCondition .')[^>]*content=(["\'])([^\1]*?)\1.*?>#'; |
73 | // If the attributes are not in the order property => content (e.g. Github) | 73 | // If the attributes are not in the order property => content (e.g. Github) |
74 | // New regex to keep this readable... more or less. | 74 | // New regex to keep this readable... more or less. |
75 | $ogRegexReverse = '#<meta[^>]+content=["\'](.*?)["\'][^>]+(?:'. $properties .')=(?:'. $orCondition .').*?>#'; | 75 | $ogRegexReverse = '#<meta[^>]+content=(["\'])([^\1]*?)\1[^>]+(?:'. $properties .')=(?:'. $orCondition .').*?>#'; |
76 | 76 | ||
77 | if (preg_match($ogRegex, $html, $matches) > 0 | 77 | if (preg_match($ogRegex, $html, $matches) > 0 |
78 | || preg_match($ogRegexReverse, $html, $matches) > 0 | 78 | || preg_match($ogRegexReverse, $html, $matches) > 0 |
79 | ) { | 79 | ) { |
80 | return $matches[1]; | 80 | return $matches[2]; |
81 | } | 81 | } |
82 | 82 | ||
83 | return false; | 83 | return false; |
diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php index 3321242f..9bddf84b 100644 --- a/tests/bookmark/LinkUtilsTest.php +++ b/tests/bookmark/LinkUtilsTest.php | |||
@@ -169,6 +169,36 @@ class LinkUtilsTest extends TestCase | |||
169 | } | 169 | } |
170 | 170 | ||
171 | /** | 171 | /** |
172 | * Test html_extract_tag() with double quoted content containing single quote, and the opposite. | ||
173 | */ | ||
174 | public function testHtmlExtractExistentNameTagWithMixedQuotes(): void | ||
175 | { | ||
176 | $description = 'Bob and Alice share M&M\'s.'; | ||
177 | |||
178 | $html = '<meta property="og:description" content="' . $description . '">'; | ||
179 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
180 | |||
181 | $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '. | ||
182 | 'tag2="content2" content="' . $description . '" tag3="content3">'; | ||
183 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
184 | |||
185 | $html = '<meta property="og:description" name="description" content="' . $description . '">'; | ||
186 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
187 | |||
188 | $description = 'Bob and Alice share "cookies".'; | ||
189 | |||
190 | $html = '<meta property="og:description" content=\'' . $description . '\'>'; | ||
191 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
192 | |||
193 | $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '. | ||
194 | 'tag2="content2" content=\'' . $description . '\' tag3="content3">'; | ||
195 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
196 | |||
197 | $html = '<meta property="og:description" name="description" content=\'' . $description . '\'>'; | ||
198 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
199 | } | ||
200 | |||
201 | /** | ||
172 | * Test html_extract_tag() when the tag <meta name= is not found. | 202 | * Test html_extract_tag() when the tag <meta name= is not found. |
173 | */ | 203 | */ |
174 | public function testHtmlExtractNonExistentNameTag() | 204 | public function testHtmlExtractNonExistentNameTag() |