diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-11-08 13:54:39 +0100 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2020-11-08 13:54:39 +0100 |
commit | 00d3dd91ef42df13eeafbcc54dcebe3238e322c6 (patch) | |
tree | 123066f497546ad181c96ef2bdd1fde011457807 /application/bookmark | |
parent | 8c5f6c786d00310b2e863aa316927effb7bfeedb (diff) | |
download | Shaarli-00d3dd91ef42df13eeafbcc54dcebe3238e322c6.tar.gz Shaarli-00d3dd91ef42df13eeafbcc54dcebe3238e322c6.tar.zst Shaarli-00d3dd91ef42df13eeafbcc54dcebe3238e322c6.zip |
Fix an issue truncating extracted metadata content
Previous regex forced the selection to stop at either the first single or double quote found, regardless of the opening quote. Using '\1', we're sure to wait for the proper quote before stopping the capture.
Diffstat (limited to 'application/bookmark')
-rw-r--r-- | application/bookmark/LinkUtils.php | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php index 17c37979..a74fda57 100644 --- a/application/bookmark/LinkUtils.php +++ b/application/bookmark/LinkUtils.php | |||
@@ -68,16 +68,16 @@ function html_extract_tag($tag, $html) | |||
68 | $properties = implode('|', $propertiesKey); | 68 | $properties = implode('|', $propertiesKey); |
69 | // We need a OR here to accept either 'property=og:noquote' or 'property="og:unrelated og:my-tag"' | 69 | // We need a OR here to accept either 'property=og:noquote' or 'property="og:unrelated og:my-tag"' |
70 | $orCondition = '["\']?(?:og:)?'. $tag .'["\']?|["\'][^\'"]*?(?:og:)?' . $tag . '[^\'"]*?[\'"]'; | 70 | $orCondition = '["\']?(?:og:)?'. $tag .'["\']?|["\'][^\'"]*?(?:og:)?' . $tag . '[^\'"]*?[\'"]'; |
71 | // Try to retrieve OpenGraph image. | 71 | // Try to retrieve OpenGraph tag. |
72 | $ogRegex = '#<meta[^>]+(?:'. $properties .')=(?:'. $orCondition .')[^>]*content=["\'](.*?)["\'].*?>#'; | 72 | $ogRegex = '#<meta[^>]+(?:'. $properties .')=(?:'. $orCondition .')[^>]*content=(["\'])([^\1]*?)\1.*?>#'; |
73 | // If the attributes are not in the order property => content (e.g. Github) | 73 | // If the attributes are not in the order property => content (e.g. Github) |
74 | // New regex to keep this readable... more or less. | 74 | // New regex to keep this readable... more or less. |
75 | $ogRegexReverse = '#<meta[^>]+content=["\'](.*?)["\'][^>]+(?:'. $properties .')=(?:'. $orCondition .').*?>#'; | 75 | $ogRegexReverse = '#<meta[^>]+content=(["\'])([^\1]*?)\1[^>]+(?:'. $properties .')=(?:'. $orCondition .').*?>#'; |
76 | 76 | ||
77 | if (preg_match($ogRegex, $html, $matches) > 0 | 77 | if (preg_match($ogRegex, $html, $matches) > 0 |
78 | || preg_match($ogRegexReverse, $html, $matches) > 0 | 78 | || preg_match($ogRegexReverse, $html, $matches) > 0 |
79 | ) { | 79 | ) { |
80 | return $matches[1]; | 80 | return $matches[2]; |
81 | } | 81 | } |
82 | 82 | ||
83 | return false; | 83 | return false; |