$properties = implode('|', $propertiesKey);
// We need a OR here to accept either 'property=og:noquote' or 'property="og:unrelated og:my-tag"'
$orCondition = '["\']?(?:og:)?' . $tag . '["\']?|["\'][^\'"]*?(?:og:)?' . $tag . '[^\'"]*?[\'"]';
+ // Support quotes in double quoted content, and the other way around
+ $content = 'content=(["\'])((?:(?!\1).)*)\1';
// Try to retrieve OpenGraph tag.
- $ogRegex = '#<meta[^>]+(?:' . $properties . ')=(?:' . $orCondition . ')[^>]*content=(["\'])([^\1]*?)\1.*?>#';
+ $ogRegex = '#<meta[^>]+(?:' . $properties . ')=(?:' . $orCondition . ')[^>]*' . $content . '.*?>#';
// If the attributes are not in the order property => content (e.g. Github)
// New regex to keep this readable... more or less.
- $ogRegexReverse = '#<meta[^>]+content=(["\'])([^\1]*?)\1[^>]+(?:' . $properties . ')=(?:' . $orCondition . ').*?>#';
+ $ogRegexReverse = '#<meta[^>]+' . $content . '[^>]+(?:' . $properties . ')=(?:' . $orCondition . ').*?>#';
if (
preg_match($ogRegex, $html, $matches) > 0
$this->assertFalse(html_extract_tag('description', $html));
}
+ public function testHtmlExtractDescriptionFromGoogleRealCase(): void
+ {
+ $html = 'id="gsr"><meta content="Fêtes de fin d\'année" property="twitter:title"><meta '.
+ 'content="Bonnes fêtes de fin d\'année ! #GoogleDoodle" property="twitter:description">'.
+ '<meta content="Bonnes fêtes de fin d\'année ! #GoogleDoodle" property="og:description">'.
+ '<meta content="summary_large_image" property="twitter:card"><meta co'
+ ;
+ $this->assertSame('Bonnes fêtes de fin d\'année ! #GoogleDoodle', html_extract_tag('description', $html));
+ }
+
/**
* Test the header callback with valid value
*/