diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-10-13 12:26:55 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-13 12:26:55 +0200 |
commit | 458b6b9918ec27154dd45416947bb93bedb97109 (patch) | |
tree | c1c565def0a4fffac5d0556794451c49fc4d52e4 /tests/bookmark | |
parent | 543b16b4f4bbde4e9857490e2175e44b4d941eb3 (diff) | |
parent | 2cd0509b503332b1989f06da45d569d4d2929be5 (diff) | |
download | Shaarli-458b6b9918ec27154dd45416947bb93bedb97109.tar.gz Shaarli-458b6b9918ec27154dd45416947bb93bedb97109.tar.zst Shaarli-458b6b9918ec27154dd45416947bb93bedb97109.zip |
Merge pull request #1540 from ArthurHoaro/fix/metadata-regexes
Improve regex to extract HTML metadata (title, description, etc.)
Diffstat (limited to 'tests/bookmark')
-rw-r--r-- | tests/bookmark/LinkUtilsTest.php | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php index ef00b92f..29941c8c 100644 --- a/tests/bookmark/LinkUtilsTest.php +++ b/tests/bookmark/LinkUtilsTest.php | |||
@@ -94,8 +94,78 @@ class LinkUtilsTest extends TestCase | |||
94 | public function testHtmlExtractExistentNameTag() | 94 | public function testHtmlExtractExistentNameTag() |
95 | { | 95 | { |
96 | $description = 'Bob and Alice share cookies.'; | 96 | $description = 'Bob and Alice share cookies.'; |
97 | |||
98 | // Simple one line | ||
97 | $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>'; | 99 | $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>'; |
98 | $this->assertEquals($description, html_extract_tag('description', $html)); | 100 | $this->assertEquals($description, html_extract_tag('description', $html)); |
101 | |||
102 | // Simple OpenGraph | ||
103 | $html = '<meta property="og:description" content="' . $description . '">'; | ||
104 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
105 | |||
106 | // Simple reversed OpenGraph | ||
107 | $html = '<meta content="' . $description . '" property="og:description">'; | ||
108 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
109 | |||
110 | // ItemProp OpenGraph | ||
111 | $html = '<meta itemprop="og:description" content="' . $description . '">'; | ||
112 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
113 | |||
114 | // OpenGraph without quotes | ||
115 | $html = '<meta property=og:description content="' . $description . '">'; | ||
116 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
117 | |||
118 | // OpenGraph reversed without quotes | ||
119 | $html = '<meta content="' . $description . '" property=og:description>'; | ||
120 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
121 | |||
122 | // OpenGraph with noise | ||
123 | $html = '<meta tag1="content1" property="og:description" tag2="content2" content="' . | ||
124 | $description . '" tag3="content3">'; | ||
125 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
126 | |||
127 | // OpenGraph reversed with noise | ||
128 | $html = '<meta tag1="content1" content="' . $description . '" ' . | ||
129 | 'tag3="content3" tag2="content2" property="og:description">'; | ||
130 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
131 | |||
132 | // OpenGraph multiple properties start | ||
133 | $html = '<meta property="unrelated og:description" content="' . $description . '">'; | ||
134 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
135 | |||
136 | // OpenGraph multiple properties end | ||
137 | $html = '<meta property="og:description unrelated" content="' . $description . '">'; | ||
138 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
139 | |||
140 | // OpenGraph multiple properties both end | ||
141 | $html = '<meta property="og:unrelated1 og:description og:unrelated2" content="' . $description . '">'; | ||
142 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
143 | |||
144 | // OpenGraph multiple properties both end with noise | ||
145 | $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '. | ||
146 | 'tag2="content2" content="' . $description . '" tag3="content3">'; | ||
147 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
148 | |||
149 | // OpenGraph reversed multiple properties start | ||
150 | $html = '<meta content="' . $description . '" property="unrelated og:description">'; | ||
151 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
152 | |||
153 | // OpenGraph reversed multiple properties end | ||
154 | $html = '<meta content="' . $description . '" property="og:description unrelated">'; | ||
155 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
156 | |||
157 | // OpenGraph reversed multiple properties both end | ||
158 | $html = '<meta content="' . $description . '" property="og:unrelated1 og:description og:unrelated2">'; | ||
159 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
160 | |||
161 | // OpenGraph reversed multiple properties both end with noise | ||
162 | $html = '<meta tag1="content1" content="' . $description . '" tag2="content2" '. | ||
163 | 'property="og:unrelated1 og:description og:unrelated2" tag3="content3">'; | ||
164 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
165 | |||
166 | // Suggestion from #1375 | ||
167 | $html = '<meta property="og:description" name="description" content="' . $description . '">'; | ||
168 | $this->assertEquals($description, html_extract_tag('description', $html)); | ||
99 | } | 169 | } |
100 | 170 | ||
101 | /** | 171 | /** |
@@ -105,6 +175,25 @@ class LinkUtilsTest extends TestCase | |||
105 | { | 175 | { |
106 | $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>'; | 176 | $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>'; |
107 | $this->assertFalse(html_extract_tag('description', $html)); | 177 | $this->assertFalse(html_extract_tag('description', $html)); |
178 | |||
179 | // Partial meta tag | ||
180 | $html = '<meta content="Brief description">'; | ||
181 | $this->assertFalse(html_extract_tag('description', $html)); | ||
182 | |||
183 | $html = '<meta property="og:description">'; | ||
184 | $this->assertFalse(html_extract_tag('description', $html)); | ||
185 | |||
186 | $html = '<meta tag1="content1" property="og:description">'; | ||
187 | $this->assertFalse(html_extract_tag('description', $html)); | ||
188 | |||
189 | $html = '<meta property="og:description" tag1="content1">'; | ||
190 | $this->assertFalse(html_extract_tag('description', $html)); | ||
191 | |||
192 | $html = '<meta tag1="content1" content="Brief description">'; | ||
193 | $this->assertFalse(html_extract_tag('description', $html)); | ||
194 | |||
195 | $html = '<meta content="Brief description" tag1="content1">'; | ||
196 | $this->assertFalse(html_extract_tag('description', $html)); | ||
108 | } | 197 | } |
109 | 198 | ||
110 | /** | 199 | /** |