diff options
-rw-r--r-- | application/bookmark/LinkUtils.php | 6 | ||||
-rw-r--r-- | tests/bookmark/LinkUtilsTest.php | 89 |
2 files changed, 93 insertions, 2 deletions
diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php index e7af4d55..faf5dbfd 100644 --- a/application/bookmark/LinkUtils.php +++ b/application/bookmark/LinkUtils.php | |||
@@ -66,11 +66,13 @@ function html_extract_tag($tag, $html) | |||
66 | { | 66 | { |
67 | $propertiesKey = ['property', 'name', 'itemprop']; | 67 | $propertiesKey = ['property', 'name', 'itemprop']; |
68 | $properties = implode('|', $propertiesKey); | 68 | $properties = implode('|', $propertiesKey); |
69 | // We need a OR here to accept either 'property=og:noquote' or 'property="og:unrelated og:my-tag"' | ||
70 | $orCondition = '["\']?(?:og:)?'. $tag .'["\']?|["\'][^\'"]*?(?:og:)?' . $tag . '[^\'"]*?[\'"]'; | ||
69 | // Try to retrieve OpenGraph image. | 71 | // Try to retrieve OpenGraph image. |
70 | $ogRegex = '#<meta[^>]+(?:'. $properties .')=["\']?(?:og:)?'. $tag .'["\'\s][^>]*content=["\']?(.*?)["\'/>]#'; | 72 | $ogRegex = '#<meta[^>]+(?:'. $properties .')=(?:'. $orCondition .')[^>]*content=["\'](.*?)["\'].*?>#'; |
71 | // If the attributes are not in the order property => content (e.g. Github) | 73 | // If the attributes are not in the order property => content (e.g. Github) |
72 | // New regex to keep this readable... more or less. | 74 | // New regex to keep this readable... more or less. |
73 | $ogRegexReverse = '#<meta[^>]+content=["\']([^"\']+)[^>]+(?:'. $properties .')=["\']?(?:og)?:'. $tag .'["\'\s/>]#'; | 75 | $ogRegexReverse = '#<meta[^>]+content=["\'](.*?)["\'][^>]+(?:'. $properties .')=(?:'. $orCondition .').*?>#'; |
74 | 76 | ||
75 | if (preg_match($ogRegex, $html, $matches) > 0 | 77 | if (preg_match($ogRegex, $html, $matches) > 0 |
76 | || preg_match($ogRegexReverse, $html, $matches) > 0 | 78 | || preg_match($ogRegexReverse, $html, $matches) > 0 |
diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php index ef00b92f..29941c8c 100644 --- a/tests/bookmark/LinkUtilsTest.php +++ b/tests/bookmark/LinkUtilsTest.php | |||
@@ -94,8 +94,78 @@ class LinkUtilsTest extends TestCase | |||
94 | public function testHtmlExtractExistentNameTag() | 94 | public function testHtmlExtractExistentNameTag() |
95 | { | 95 | { |
96 | $description = 'Bob and Alice share cookies.'; | 96 | $description = 'Bob and Alice share cookies.'; |
97 | |||
98 | // Simple one line | ||
97 | $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>'; | 99 | $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>'; |
98 | $this->assertEquals($description, html_extract_tag('description', $html)); | 100 | $this->assertEquals($description, html_extract_tag('description', $html)); |
101 | |||
102 | // Simple OpenGraph | ||
103 | $html = '<meta property="og:description" content="' . $description . '">'; | ||
104 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
105 | |||
106 | // Simple reversed OpenGraph | ||
107 | $html = '<meta content="' . $description . '" property="og:description">'; | ||
108 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
109 | |||
110 | // ItemProp OpenGraph | ||
111 | $html = '<meta itemprop="og:description" content="' . $description . '">'; | ||
112 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
113 | |||
114 | // OpenGraph without quotes | ||
115 | $html = '<meta property=og:description content="' . $description . '">'; | ||
116 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
117 | |||
118 | // OpenGraph reversed without quotes | ||
119 | $html = '<meta content="' . $description . '" property=og:description>'; | ||
120 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
121 | |||
122 | // OpenGraph with noise | ||
123 | $html = '<meta tag1="content1" property="og:description" tag2="content2" content="' . | ||
124 | $description . '" tag3="content3">'; | ||
125 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
126 | |||
127 | // OpenGraph reversed with noise | ||
128 | $html = '<meta tag1="content1" content="' . $description . '" ' . | ||
129 | 'tag3="content3" tag2="content2" property="og:description">'; | ||
130 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
131 | |||
132 | // OpenGraph multiple properties start | ||
133 | $html = '<meta property="unrelated og:description" content="' . $description . '">'; | ||
134 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
135 | |||
136 | // OpenGraph multiple properties end | ||
137 | $html = '<meta property="og:description unrelated" content="' . $description . '">'; | ||
138 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
139 | |||
140 | // OpenGraph multiple properties both end | ||
141 | $html = '<meta property="og:unrelated1 og:description og:unrelated2" content="' . $description . '">'; | ||
142 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
143 | |||
144 | // OpenGraph multiple properties both end with noise | ||
145 | $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '. | ||
146 | 'tag2="content2" content="' . $description . '" tag3="content3">'; | ||
147 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
148 | |||
149 | // OpenGraph reversed multiple properties start | ||
150 | $html = '<meta content="' . $description . '" property="unrelated og:description">'; | ||
151 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
152 | |||
153 | // OpenGraph reversed multiple properties end | ||
154 | $html = '<meta content="' . $description . '" property="og:description unrelated">'; | ||
155 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
156 | |||
157 | // OpenGraph reversed multiple properties both end | ||
158 | $html = '<meta content="' . $description . '" property="og:unrelated1 og:description og:unrelated2">'; | ||
159 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
160 | |||
161 | // OpenGraph reversed multiple properties both end with noise | ||
162 | $html = '<meta tag1="content1" content="' . $description . '" tag2="content2" '. | ||
163 | 'property="og:unrelated1 og:description og:unrelated2" tag3="content3">'; | ||
164 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
165 | |||
166 | // Suggestion from #1375 | ||
167 | $html = '<meta property="og:description" name="description" content="' . $description . '">'; | ||
168 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
99 | } | 169 | } |
100 | 170 | ||
101 | /** | 171 | /** |
@@ -105,6 +175,25 @@ class LinkUtilsTest extends TestCase | |||
105 | { | 175 | { |
106 | $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>'; | 176 | $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>'; |
107 | $this->assertFalse(html_extract_tag('description', $html)); | 177 | $this->assertFalse(html_extract_tag('description', $html)); |
178 | |||
179 | // Partial meta tag | ||
180 | $html = '<meta content="Brief description">'; | ||
181 | $this->assertFalse(html_extract_tag('description', $html)); | ||
182 | |||
183 | $html = '<meta property="og:description">'; | ||
184 | $this->assertFalse(html_extract_tag('description', $html)); | ||
185 | |||
186 | $html = '<meta tag1="content1" property="og:description">'; | ||
187 | $this->assertFalse(html_extract_tag('description', $html)); | ||
188 | |||
189 | $html = '<meta property="og:description" tag1="content1">'; | ||
190 | $this->assertFalse(html_extract_tag('description', $html)); | ||
191 | |||
192 | $html = '<meta tag1="content1" content="Brief description">'; | ||
193 | $this->assertFalse(html_extract_tag('description', $html)); | ||
194 | |||
195 | $html = '<meta content="Brief description" tag1="content1">'; | ||
196 | $this->assertFalse(html_extract_tag('description', $html)); | ||
108 | } | 197 | } |
109 | 198 | ||
110 | /** | 199 | /** |