aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--application/bookmark/LinkUtils.php6
-rw-r--r--tests/bookmark/LinkUtilsTest.php89
2 files changed, 93 insertions, 2 deletions
diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php
index e7af4d55..faf5dbfd 100644
--- a/application/bookmark/LinkUtils.php
+++ b/application/bookmark/LinkUtils.php
@@ -66,11 +66,13 @@ function html_extract_tag($tag, $html)
66{ 66{
67 $propertiesKey = ['property', 'name', 'itemprop']; 67 $propertiesKey = ['property', 'name', 'itemprop'];
68 $properties = implode('|', $propertiesKey); 68 $properties = implode('|', $propertiesKey);
69 // We need a OR here to accept either 'property=og:noquote' or 'property="og:unrelated og:my-tag"'
70 $orCondition = '["\']?(?:og:)?'. $tag .'["\']?|["\'][^\'"]*?(?:og:)?' . $tag . '[^\'"]*?[\'"]';
69 // Try to retrieve OpenGraph image. 71 // Try to retrieve OpenGraph image.
70 $ogRegex = '#<meta[^>]+(?:'. $properties .')=["\']?(?:og:)?'. $tag .'["\'\s][^>]*content=["\']?(.*?)["\'/>]#'; 72 $ogRegex = '#<meta[^>]+(?:'. $properties .')=(?:'. $orCondition .')[^>]*content=["\'](.*?)["\'].*?>#';
71 // If the attributes are not in the order property => content (e.g. Github) 73 // If the attributes are not in the order property => content (e.g. Github)
72 // New regex to keep this readable... more or less. 74 // New regex to keep this readable... more or less.
73 $ogRegexReverse = '#<meta[^>]+content=["\']([^"\']+)[^>]+(?:'. $properties .')=["\']?(?:og)?:'. $tag .'["\'\s/>]#'; 75 $ogRegexReverse = '#<meta[^>]+content=["\'](.*?)["\'][^>]+(?:'. $properties .')=(?:'. $orCondition .').*?>#';
74 76
75 if (preg_match($ogRegex, $html, $matches) > 0 77 if (preg_match($ogRegex, $html, $matches) > 0
76 || preg_match($ogRegexReverse, $html, $matches) > 0 78 || preg_match($ogRegexReverse, $html, $matches) > 0
diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php
index ef00b92f..29941c8c 100644
--- a/tests/bookmark/LinkUtilsTest.php
+++ b/tests/bookmark/LinkUtilsTest.php
@@ -94,8 +94,78 @@ class LinkUtilsTest extends TestCase
94 public function testHtmlExtractExistentNameTag() 94 public function testHtmlExtractExistentNameTag()
95 { 95 {
96 $description = 'Bob and Alice share cookies.'; 96 $description = 'Bob and Alice share cookies.';
97
98 // Simple one line
97 $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>'; 99 $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>';
98 $this->assertEquals($description, html_extract_tag('description', $html)); 100 $this->assertEquals($description, html_extract_tag('description', $html));
101
102 // Simple OpenGraph
103 $html = '<meta property="og:description" content="' . $description . '">';
104 $this->assertEquals($description, html_extract_tag('description', $html));
105
106 // Simple reversed OpenGraph
107 $html = '<meta content="' . $description . '" property="og:description">';
108 $this->assertEquals($description, html_extract_tag('description', $html));
109
110 // ItemProp OpenGraph
111 $html = '<meta itemprop="og:description" content="' . $description . '">';
112 $this->assertEquals($description, html_extract_tag('description', $html));
113
114 // OpenGraph without quotes
115 $html = '<meta property=og:description content="' . $description . '">';
116 $this->assertEquals($description, html_extract_tag('description', $html));
117
118 // OpenGraph reversed without quotes
119 $html = '<meta content="' . $description . '" property=og:description>';
120 $this->assertEquals($description, html_extract_tag('description', $html));
121
122 // OpenGraph with noise
123 $html = '<meta tag1="content1" property="og:description" tag2="content2" content="' .
124 $description . '" tag3="content3">';
125 $this->assertEquals($description, html_extract_tag('description', $html));
126
127 // OpenGraph reversed with noise
128 $html = '<meta tag1="content1" content="' . $description . '" ' .
129 'tag3="content3" tag2="content2" property="og:description">';
130 $this->assertEquals($description, html_extract_tag('description', $html));
131
132 // OpenGraph multiple properties start
133 $html = '<meta property="unrelated og:description" content="' . $description . '">';
134 $this->assertEquals($description, html_extract_tag('description', $html));
135
136 // OpenGraph multiple properties end
137 $html = '<meta property="og:description unrelated" content="' . $description . '">';
138 $this->assertEquals($description, html_extract_tag('description', $html));
139
140 // OpenGraph multiple properties both end
141 $html = '<meta property="og:unrelated1 og:description og:unrelated2" content="' . $description . '">';
142 $this->assertEquals($description, html_extract_tag('description', $html));
143
144 // OpenGraph multiple properties both end with noise
145 $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
146 'tag2="content2" content="' . $description . '" tag3="content3">';
147 $this->assertEquals($description, html_extract_tag('description', $html));
148
149 // OpenGraph reversed multiple properties start
150 $html = '<meta content="' . $description . '" property="unrelated og:description">';
151 $this->assertEquals($description, html_extract_tag('description', $html));
152
153 // OpenGraph reversed multiple properties end
154 $html = '<meta content="' . $description . '" property="og:description unrelated">';
155 $this->assertEquals($description, html_extract_tag('description', $html));
156
157 // OpenGraph reversed multiple properties both end
158 $html = '<meta content="' . $description . '" property="og:unrelated1 og:description og:unrelated2">';
159 $this->assertEquals($description, html_extract_tag('description', $html));
160
161 // OpenGraph reversed multiple properties both end with noise
162 $html = '<meta tag1="content1" content="' . $description . '" tag2="content2" '.
163 'property="og:unrelated1 og:description og:unrelated2" tag3="content3">';
164 $this->assertEquals($description, html_extract_tag('description', $html));
165
166 // Suggestion from #1375
167 $html = '<meta property="og:description" name="description" content="' . $description . '">';
168 $this->assertEquals($description, html_extract_tag('description', $html));
99 } 169 }
100 170
101 /** 171 /**
@@ -105,6 +175,25 @@ class LinkUtilsTest extends TestCase
105 { 175 {
106 $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>'; 176 $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
107 $this->assertFalse(html_extract_tag('description', $html)); 177 $this->assertFalse(html_extract_tag('description', $html));
178
179 // Partial meta tag
180 $html = '<meta content="Brief description">';
181 $this->assertFalse(html_extract_tag('description', $html));
182
183 $html = '<meta property="og:description">';
184 $this->assertFalse(html_extract_tag('description', $html));
185
186 $html = '<meta tag1="content1" property="og:description">';
187 $this->assertFalse(html_extract_tag('description', $html));
188
189 $html = '<meta property="og:description" tag1="content1">';
190 $this->assertFalse(html_extract_tag('description', $html));
191
192 $html = '<meta tag1="content1" content="Brief description">';
193 $this->assertFalse(html_extract_tag('description', $html));
194
195 $html = '<meta content="Brief description" tag1="content1">';
196 $this->assertFalse(html_extract_tag('description', $html));
108 } 197 }
109 198
110 /** 199 /**