aboutsummaryrefslogtreecommitdiffhomepage
path: root/tests/bookmark
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2020-09-03 17:46:26 +0200
committerArthurHoaro <arthur@hoa.ro>2020-09-03 17:46:26 +0200
commit2cd0509b503332b1989f06da45d569d4d2929be5 (patch)
tree7aa76192ea42a640b7238114fad1acd31ccc4960 /tests/bookmark
parent21163a3329ef19dc6ebadb75d6452ac02fd59ab3 (diff)
downloadShaarli-2cd0509b503332b1989f06da45d569d4d2929be5.tar.gz
Shaarli-2cd0509b503332b1989f06da45d569d4d2929be5.tar.zst
Shaarli-2cd0509b503332b1989f06da45d569d4d2929be5.zip
Improve regex to extract HTML metadata (title, description, etc.)
Also added a bunch of tests to cover more use cases. Fixes #1375
Diffstat (limited to 'tests/bookmark')
-rw-r--r--tests/bookmark/LinkUtilsTest.php89
1 files changed, 89 insertions, 0 deletions
diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php
index 7d4a7b89..cc7819bc 100644
--- a/tests/bookmark/LinkUtilsTest.php
+++ b/tests/bookmark/LinkUtilsTest.php
@@ -81,8 +81,78 @@ class LinkUtilsTest extends TestCase
81 public function testHtmlExtractExistentNameTag() 81 public function testHtmlExtractExistentNameTag()
82 { 82 {
83 $description = 'Bob and Alice share cookies.'; 83 $description = 'Bob and Alice share cookies.';
84
85 // Simple one line
84 $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>'; 86 $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>';
85 $this->assertEquals($description, html_extract_tag('description', $html)); 87 $this->assertEquals($description, html_extract_tag('description', $html));
88
89 // Simple OpenGraph
90 $html = '<meta property="og:description" content="' . $description . '">';
91 $this->assertEquals($description, html_extract_tag('description', $html));
92
93 // Simple reversed OpenGraph
94 $html = '<meta content="' . $description . '" property="og:description">';
95 $this->assertEquals($description, html_extract_tag('description', $html));
96
97 // ItemProp OpenGraph
98 $html = '<meta itemprop="og:description" content="' . $description . '">';
99 $this->assertEquals($description, html_extract_tag('description', $html));
100
101 // OpenGraph without quotes
102 $html = '<meta property=og:description content="' . $description . '">';
103 $this->assertEquals($description, html_extract_tag('description', $html));
104
105 // OpenGraph reversed without quotes
106 $html = '<meta content="' . $description . '" property=og:description>';
107 $this->assertEquals($description, html_extract_tag('description', $html));
108
109 // OpenGraph with noise
110 $html = '<meta tag1="content1" property="og:description" tag2="content2" content="' .
111 $description . '" tag3="content3">';
112 $this->assertEquals($description, html_extract_tag('description', $html));
113
114 // OpenGraph reversed with noise
115 $html = '<meta tag1="content1" content="' . $description . '" ' .
116 'tag3="content3" tag2="content2" property="og:description">';
117 $this->assertEquals($description, html_extract_tag('description', $html));
118
119 // OpenGraph multiple properties start
120 $html = '<meta property="unrelated og:description" content="' . $description . '">';
121 $this->assertEquals($description, html_extract_tag('description', $html));
122
123 // OpenGraph multiple properties end
124 $html = '<meta property="og:description unrelated" content="' . $description . '">';
125 $this->assertEquals($description, html_extract_tag('description', $html));
126
127 // OpenGraph multiple properties both end
128 $html = '<meta property="og:unrelated1 og:description og:unrelated2" content="' . $description . '">';
129 $this->assertEquals($description, html_extract_tag('description', $html));
130
131 // OpenGraph multiple properties both end with noise
132 $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
133 'tag2="content2" content="' . $description . '" tag3="content3">';
134 $this->assertEquals($description, html_extract_tag('description', $html));
135
136 // OpenGraph reversed multiple properties start
137 $html = '<meta content="' . $description . '" property="unrelated og:description">';
138 $this->assertEquals($description, html_extract_tag('description', $html));
139
140 // OpenGraph reversed multiple properties end
141 $html = '<meta content="' . $description . '" property="og:description unrelated">';
142 $this->assertEquals($description, html_extract_tag('description', $html));
143
144 // OpenGraph reversed multiple properties both end
145 $html = '<meta content="' . $description . '" property="og:unrelated1 og:description og:unrelated2">';
146 $this->assertEquals($description, html_extract_tag('description', $html));
147
148 // OpenGraph reversed multiple properties both end with noise
149 $html = '<meta tag1="content1" content="' . $description . '" tag2="content2" '.
150 'property="og:unrelated1 og:description og:unrelated2" tag3="content3">';
151 $this->assertEquals($description, html_extract_tag('description', $html));
152
153 // Suggestion from #1375
154 $html = '<meta property="og:description" name="description" content="' . $description . '">';
155 $this->assertEquals($description, html_extract_tag('description', $html));
86 } 156 }
87 157
88 /** 158 /**
@@ -92,6 +162,25 @@ class LinkUtilsTest extends TestCase
92 { 162 {
93 $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>'; 163 $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
94 $this->assertFalse(html_extract_tag('description', $html)); 164 $this->assertFalse(html_extract_tag('description', $html));
165
166 // Partial meta tag
167 $html = '<meta content="Brief description">';
168 $this->assertFalse(html_extract_tag('description', $html));
169
170 $html = '<meta property="og:description">';
171 $this->assertFalse(html_extract_tag('description', $html));
172
173 $html = '<meta tag1="content1" property="og:description">';
174 $this->assertFalse(html_extract_tag('description', $html));
175
176 $html = '<meta property="og:description" tag1="content1">';
177 $this->assertFalse(html_extract_tag('description', $html));
178
179 $html = '<meta tag1="content1" content="Brief description">';
180 $this->assertFalse(html_extract_tag('description', $html));
181
182 $html = '<meta content="Brief description" tag1="content1">';
183 $this->assertFalse(html_extract_tag('description', $html));
95 } 184 }
96 185
97 /** 186 /**