$properties = implode('|', $propertiesKey);
// We need a OR here to accept either 'property=og:noquote' or 'property="og:unrelated og:my-tag"'
$orCondition = '["\']?(?:og:)?'. $tag .'["\']?|["\'][^\'"]*?(?:og:)?' . $tag . '[^\'"]*?[\'"]';
- // Try to retrieve OpenGraph image.
- $ogRegex = '#<meta[^>]+(?:'. $properties .')=(?:'. $orCondition .')[^>]*content=["\'](.*?)["\'].*?>#';
+ // Try to retrieve OpenGraph tag.
+ $ogRegex = '#<meta[^>]+(?:'. $properties .')=(?:'. $orCondition .')[^>]*content=(["\'])([^\1]*?)\1.*?>#';
// If the attributes are not in the order property => content (e.g. Github)
// New regex to keep this readable... more or less.
- $ogRegexReverse = '#<meta[^>]+content=["\'](.*?)["\'][^>]+(?:'. $properties .')=(?:'. $orCondition .').*?>#';
+ $ogRegexReverse = '#<meta[^>]+content=(["\'])([^\1]*?)\1[^>]+(?:'. $properties .')=(?:'. $orCondition .').*?>#';
if (preg_match($ogRegex, $html, $matches) > 0
|| preg_match($ogRegexReverse, $html, $matches) > 0
) {
- return $matches[1];
+ return $matches[2];
}
return false;
$this->assertEquals($description, html_extract_tag('description', $html));
}
+ /**
+ * Test html_extract_tag() with double quoted content containing single quote, and the opposite.
+ */
+ public function testHtmlExtractExistentNameTagWithMixedQuotes(): void
+ {
+ $description = 'Bob and Alice share M&M\'s.';
+
+ $html = '<meta property="og:description" content="' . $description . '">';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+
+ $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
+ 'tag2="content2" content="' . $description . '" tag3="content3">';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+
+ $html = '<meta property="og:description" name="description" content="' . $description . '">';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+
+ $description = 'Bob and Alice share "cookies".';
+
+ $html = '<meta property="og:description" content=\'' . $description . '\'>';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+
+ $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
+ 'tag2="content2" content=\'' . $description . '\' tag3="content3">';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+
+ $html = '<meta property="og:description" name="description" content=\'' . $description . '\'>';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+ }
+
/**
* Test html_extract_tag() when the tag <meta name= is not found.
*/