$properties = implode('|', $propertiesKey);
// We need a OR here to accept either 'property=og:noquote' or 'property="og:unrelated og:my-tag"'
$orCondition = '["\']?(?:og:)?'. $tag .'["\']?|["\'][^\'"]*?(?:og:)?' . $tag . '[^\'"]*?[\'"]';
- // Try to retrieve OpenGraph image.
- $ogRegex = '#<meta[^>]+(?:'. $properties .')=(?:'. $orCondition .')[^>]*content=["\'](.*?)["\'].*?>#';
+ // Try to retrieve OpenGraph tag.
+ $ogRegex = '#<meta[^>]+(?:'. $properties .')=(?:'. $orCondition .')[^>]*content=(["\'])([^\1]*?)\1.*?>#';
// If the attributes are not in the order property => content (e.g. Github)
// New regex to keep this readable... more or less.
- $ogRegexReverse = '#<meta[^>]+content=["\'](.*?)["\'][^>]+(?:'. $properties .')=(?:'. $orCondition .').*?>#';
+ $ogRegexReverse = '#<meta[^>]+content=(["\'])([^\1]*?)\1[^>]+(?:'. $properties .')=(?:'. $orCondition .').*?>#';
if (preg_match($ogRegex, $html, $matches) > 0
|| preg_match($ogRegexReverse, $html, $matches) > 0
) {
- return $matches[1];
+ return $matches[2];
}
return false;
{
return isset($linkUrl[0]) && $linkUrl[0] === '?';
}
+
+ /**
+ * Extract an array of tags from a given tag string, with provided separator.
+ *
+ * @param string|null $tags String containing a list of tags separated by $separator.
+ * @param string $separator Shaarli's default: ' ' (whitespace)
+ *
+ * @return array List of tags
+ */
+ function tags_str2array(?string $tags, string $separator): array
+ {
+ // For whitespaces, we use the special \s regex character
+ $separator = $separator === ' ' ? '\s' : $separator;
+
+ return preg_split('/\s*' . $separator . '+\s*/', trim($tags) ?? '', -1, PREG_SPLIT_NO_EMPTY);
+ }
+
+ /**
+ * Return a tag string with provided separator from a list of tags.
+ * Note that given array is clean up by tags_filter().
+ *
+ * @param array|null $tags List of tags
+ * @param string $separator
+ *
+ * @return string
+ */
+ function tags_array2str(?array $tags, string $separator): string
+ {
+ return implode($separator, tags_filter($tags, $separator));
+ }
+
+ /**
+ * Clean an array of tags: trim + remove empty entries
+ *
+ * @param array|null $tags List of tags
+ * @param string $separator
+ *
+ * @return array
+ */
+ function tags_filter(?array $tags, string $separator): array
+ {
+ $trimDefault = " \t\n\r\0\x0B";
+ return array_values(array_filter(array_map(function (string $entry) use ($separator, $trimDefault): string {
+ return trim($entry, $trimDefault . $separator);
+ }, $tags ?? [])));
+ }
$this->assertEquals($description, html_extract_tag('description', $html));
}
+ /**
+ * Test html_extract_tag() with double quoted content containing single quote, and the opposite.
+ */
+ public function testHtmlExtractExistentNameTagWithMixedQuotes(): void
+ {
+ $description = 'Bob and Alice share M&M\'s.';
+
+ $html = '<meta property="og:description" content="' . $description . '">';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+
+ $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
+ 'tag2="content2" content="' . $description . '" tag3="content3">';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+
+ $html = '<meta property="og:description" name="description" content="' . $description . '">';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+
+ $description = 'Bob and Alice share "cookies".';
+
+ $html = '<meta property="og:description" content=\'' . $description . '\'>';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+
+ $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
+ 'tag2="content2" content=\'' . $description . '\' tag3="content3">';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+
+ $html = '<meta property="og:description" name="description" content=\'' . $description . '\'>';
+ $this->assertEquals($description, html_extract_tag('description', $html));
+ }
+
/**
* Test html_extract_tag() when the tag <meta name= is not found.
*/
$title,
$desc,
$keywords,
- false
+ false,
+ ' '
);
$data = [
$title,
$desc,
$keywords,
- false
+ false,
+ ' '
);
$data = [
$title,
$desc,
$keywords,
- false
+ false,
+ ' '
);
$data = [
$title,
$desc,
$keywords,
- false
+ false,
+ ' '
);
$data = [
$title,
$desc,
$keywords,
- true
+ true,
+ ' '
);
$data = [
'th=device-width">'
$this->assertFalse(is_note('https://github.com/shaarli/Shaarli/?hi'));
}
+ /**
+ * Test tags_str2array with whitespace separator.
+ */
+ public function testTagsStr2ArrayWithSpaceSeparator(): void
+ {
+ $separator = ' ';
+
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_str2array('tag1 tag2 tag3', $separator));
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_str2array('tag1 tag2 tag3', $separator));
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_str2array(' tag1 tag2 tag3 ', $separator));
+ static::assertSame(['tag1@', 'tag2,', '.tag3'], tags_str2array(' tag1@ tag2, .tag3 ', $separator));
+ static::assertSame([], tags_str2array('', $separator));
+ static::assertSame([], tags_str2array(' ', $separator));
+ static::assertSame([], tags_str2array(null, $separator));
+ }
+
+ /**
+ * Test tags_str2array with @ separator.
+ */
+ public function testTagsStr2ArrayWithCharSeparator(): void
+ {
+ $separator = '@';
+
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_str2array('tag1@tag2@tag3', $separator));
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_str2array('tag1@@@@tag2@@@@tag3', $separator));
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_str2array('@@@tag1@@@tag2@@@@tag3@@', $separator));
+ static::assertSame(
+ ['tag1#', 'tag2, and other', '.tag3'],
+ tags_str2array('@@@ tag1# @@@ tag2, and other @@@@.tag3@@', $separator)
+ );
+ static::assertSame([], tags_str2array('', $separator));
+ static::assertSame([], tags_str2array(' ', $separator));
+ static::assertSame([], tags_str2array(null, $separator));
+ }
+
+ /**
+ * Test tags_array2str with ' ' separator.
+ */
+ public function testTagsArray2StrWithSpaceSeparator(): void
+ {
+ $separator = ' ';
+
+ static::assertSame('tag1 tag2 tag3', tags_array2str(['tag1', 'tag2', 'tag3'], $separator));
+ static::assertSame('tag1, tag2@ tag3', tags_array2str(['tag1,', 'tag2@', 'tag3'], $separator));
+ static::assertSame('tag1 tag2 tag3', tags_array2str([' tag1 ', 'tag2', 'tag3 '], $separator));
+ static::assertSame('tag1 tag2 tag3', tags_array2str([' tag1 ', ' ', 'tag2', ' ', 'tag3 '], $separator));
+ static::assertSame('tag1', tags_array2str([' tag1 '], $separator));
+ static::assertSame('', tags_array2str([' '], $separator));
+ static::assertSame('', tags_array2str([], $separator));
+ static::assertSame('', tags_array2str(null, $separator));
+ }
+
+ /**
+ * Test tags_array2str with @ separator.
+ */
+ public function testTagsArray2StrWithCharSeparator(): void
+ {
+ $separator = '@';
+
+ static::assertSame('tag1@tag2@tag3', tags_array2str(['tag1', 'tag2', 'tag3'], $separator));
+ static::assertSame('tag1,@tag2@tag3', tags_array2str(['tag1,', 'tag2@', 'tag3'], $separator));
+ static::assertSame(
+ 'tag1@tag2, and other@tag3',
+ tags_array2str(['@@@@ tag1@@@', ' @tag2, and other @', 'tag3@@@@'], $separator)
+ );
+ static::assertSame('tag1@tag2@tag3', tags_array2str(['@@@tag1@@@', '@', 'tag2', '@@@', 'tag3@@@'], $separator));
+ static::assertSame('tag1', tags_array2str(['@@@@tag1@@@@'], $separator));
+ static::assertSame('', tags_array2str(['@@@'], $separator));
+ static::assertSame('', tags_array2str([], $separator));
+ static::assertSame('', tags_array2str(null, $separator));
+ }
+
+ /**
+ * Test tags_array2str with @ separator.
+ */
+ public function testTagsFilterWithSpaceSeparator(): void
+ {
+ $separator = ' ';
+
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_filter(['tag1', 'tag2', 'tag3'], $separator));
+ static::assertSame(['tag1,', 'tag2@', 'tag3'], tags_filter(['tag1,', 'tag2@', 'tag3'], $separator));
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_filter([' tag1 ', 'tag2', 'tag3 '], $separator));
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_filter([' tag1 ', ' ', 'tag2', ' ', 'tag3 '], $separator));
+ static::assertSame(['tag1'], tags_filter([' tag1 '], $separator));
+ static::assertSame([], tags_filter([' '], $separator));
+ static::assertSame([], tags_filter([], $separator));
+ static::assertSame([], tags_filter(null, $separator));
+ }
+
+ /**
+ * Test tags_array2str with @ separator.
+ */
+ public function testTagsArrayFilterWithSpaceSeparator(): void
+ {
+ $separator = '@';
+
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_filter(['tag1', 'tag2', 'tag3'], $separator));
+ static::assertSame(['tag1,', 'tag2#', 'tag3'], tags_filter(['tag1,', 'tag2#', 'tag3'], $separator));
+ static::assertSame(
+ ['tag1', 'tag2, and other', 'tag3'],
+ tags_filter(['@@@@ tag1@@@', ' @tag2, and other @', 'tag3@@@@'], $separator)
+ );
+ static::assertSame(['tag1', 'tag2', 'tag3'], tags_filter(['@@@tag1@@@', '@', 'tag2', '@@@', 'tag3@@@'], $separator));
+ static::assertSame(['tag1'], tags_filter(['@@@@tag1@@@@'], $separator));
+ static::assertSame([], tags_filter(['@@@'], $separator));
+ static::assertSame([], tags_filter([], $separator));
+ static::assertSame([], tags_filter(null, $separator));
+ }
+
/**
* Util function to build an hashtag link.
*