aboutsummaryrefslogtreecommitdiffhomepage
path: root/tests
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2020-10-13 12:26:55 +0200
committerGitHub <noreply@github.com>2020-10-13 12:26:55 +0200
commit458b6b9918ec27154dd45416947bb93bedb97109 (patch)
treec1c565def0a4fffac5d0556794451c49fc4d52e4 /tests
parent543b16b4f4bbde4e9857490e2175e44b4d941eb3 (diff)
parent2cd0509b503332b1989f06da45d569d4d2929be5 (diff)
downloadShaarli-458b6b9918ec27154dd45416947bb93bedb97109.tar.gz
Shaarli-458b6b9918ec27154dd45416947bb93bedb97109.tar.zst
Shaarli-458b6b9918ec27154dd45416947bb93bedb97109.zip
Merge pull request #1540 from ArthurHoaro/fix/metadata-regexes
Improve regex to extract HTML metadata (title, description, etc.)
Diffstat (limited to 'tests')
-rw-r--r--tests/bookmark/LinkUtilsTest.php89
1 files changed, 89 insertions, 0 deletions
diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php
index ef00b92f..29941c8c 100644
--- a/tests/bookmark/LinkUtilsTest.php
+++ b/tests/bookmark/LinkUtilsTest.php
@@ -94,8 +94,78 @@ class LinkUtilsTest extends TestCase
94 public function testHtmlExtractExistentNameTag() 94 public function testHtmlExtractExistentNameTag()
95 { 95 {
96 $description = 'Bob and Alice share cookies.'; 96 $description = 'Bob and Alice share cookies.';
97
98 // Simple one line
97 $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>'; 99 $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>';
98 $this->assertEquals($description, html_extract_tag('description', $html)); 100 $this->assertEquals($description, html_extract_tag('description', $html));
101
102 // Simple OpenGraph
103 $html = '<meta property="og:description" content="' . $description . '">';
104 $this->assertEquals($description, html_extract_tag('description', $html));
105
106 // Simple reversed OpenGraph
107 $html = '<meta content="' . $description . '" property="og:description">';
108 $this->assertEquals($description, html_extract_tag('description', $html));
109
110 // ItemProp OpenGraph
111 $html = '<meta itemprop="og:description" content="' . $description . '">';
112 $this->assertEquals($description, html_extract_tag('description', $html));
113
114 // OpenGraph without quotes
115 $html = '<meta property=og:description content="' . $description . '">';
116 $this->assertEquals($description, html_extract_tag('description', $html));
117
118 // OpenGraph reversed without quotes
119 $html = '<meta content="' . $description . '" property=og:description>';
120 $this->assertEquals($description, html_extract_tag('description', $html));
121
122 // OpenGraph with noise
123 $html = '<meta tag1="content1" property="og:description" tag2="content2" content="' .
124 $description . '" tag3="content3">';
125 $this->assertEquals($description, html_extract_tag('description', $html));
126
127 // OpenGraph reversed with noise
128 $html = '<meta tag1="content1" content="' . $description . '" ' .
129 'tag3="content3" tag2="content2" property="og:description">';
130 $this->assertEquals($description, html_extract_tag('description', $html));
131
132 // OpenGraph multiple properties start
133 $html = '<meta property="unrelated og:description" content="' . $description . '">';
134 $this->assertEquals($description, html_extract_tag('description', $html));
135
136 // OpenGraph multiple properties end
137 $html = '<meta property="og:description unrelated" content="' . $description . '">';
138 $this->assertEquals($description, html_extract_tag('description', $html));
139
140 // OpenGraph multiple properties both end
141 $html = '<meta property="og:unrelated1 og:description og:unrelated2" content="' . $description . '">';
142 $this->assertEquals($description, html_extract_tag('description', $html));
143
144 // OpenGraph multiple properties both end with noise
145 $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
146 'tag2="content2" content="' . $description . '" tag3="content3">';
147 $this->assertEquals($description, html_extract_tag('description', $html));
148
149 // OpenGraph reversed multiple properties start
150 $html = '<meta content="' . $description . '" property="unrelated og:description">';
151 $this->assertEquals($description, html_extract_tag('description', $html));
152
153 // OpenGraph reversed multiple properties end
154 $html = '<meta content="' . $description . '" property="og:description unrelated">';
155 $this->assertEquals($description, html_extract_tag('description', $html));
156
157 // OpenGraph reversed multiple properties both end
158 $html = '<meta content="' . $description . '" property="og:unrelated1 og:description og:unrelated2">';
159 $this->assertEquals($description, html_extract_tag('description', $html));
160
161 // OpenGraph reversed multiple properties both end with noise
162 $html = '<meta tag1="content1" content="' . $description . '" tag2="content2" '.
163 'property="og:unrelated1 og:description og:unrelated2" tag3="content3">';
164 $this->assertEquals($description, html_extract_tag('description', $html));
165
166 // Suggestion from #1375
167 $html = '<meta property="og:description" name="description" content="' . $description . '">';
168 $this->assertEquals($description, html_extract_tag('description', $html));
99 } 169 }
100 170
101 /** 171 /**
@@ -105,6 +175,25 @@ class LinkUtilsTest extends TestCase
105 { 175 {
106 $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>'; 176 $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
107 $this->assertFalse(html_extract_tag('description', $html)); 177 $this->assertFalse(html_extract_tag('description', $html));
178
179 // Partial meta tag
180 $html = '<meta content="Brief description">';
181 $this->assertFalse(html_extract_tag('description', $html));
182
183 $html = '<meta property="og:description">';
184 $this->assertFalse(html_extract_tag('description', $html));
185
186 $html = '<meta tag1="content1" property="og:description">';
187 $this->assertFalse(html_extract_tag('description', $html));
188
189 $html = '<meta property="og:description" tag1="content1">';
190 $this->assertFalse(html_extract_tag('description', $html));
191
192 $html = '<meta tag1="content1" content="Brief description">';
193 $this->assertFalse(html_extract_tag('description', $html));
194
195 $html = '<meta content="Brief description" tag1="content1">';
196 $this->assertFalse(html_extract_tag('description', $html));
108 } 197 }
109 198
110 /** 199 /**