]> git.immae.eu Git - github/shaarli/Shaarli.git/blob - tests/bookmark/LinkUtilsTest.php
Fix an issue truncating extracted metadata content
[github/shaarli/Shaarli.git] / tests / bookmark / LinkUtilsTest.php
1 <?php
2
3 namespace Shaarli\Bookmark;
4
5 use Shaarli\TestCase;
6
7 require_once 'tests/utils/CurlUtils.php';
8
9 /**
10 * Class LinkUtilsTest.
11 */
12 class LinkUtilsTest extends TestCase
13 {
14 /**
15 * Test html_extract_title() when the title is found.
16 */
17 public function testHtmlExtractExistentTitle()
18 {
19 $title = 'Read me please.';
20 $html = '<html><meta>stuff</meta><title>' . $title . '</title></html>';
21 $this->assertEquals($title, html_extract_title($html));
22 $html = '<html><title>' . $title . '</title>blabla<title>another</title></html>';
23 $this->assertEquals($title, html_extract_title($html));
24 }
25
26 /**
27 * Test html_extract_title() when the title is not found.
28 */
29 public function testHtmlExtractNonExistentTitle()
30 {
31 $html = '<html><meta>stuff</meta></html>';
32 $this->assertFalse(html_extract_title($html));
33 }
34
35 /**
36 * Test headers_extract_charset() when the charset is found.
37 */
38 public function testHeadersExtractExistentCharset()
39 {
40 $charset = 'x-MacCroatian';
41 $headers = 'text/html; charset=' . $charset;
42 $this->assertEquals(strtolower($charset), header_extract_charset($headers));
43 }
44
45 /**
46 * Test headers_extract_charset() when the charset is found with odd quotes.
47 */
48 public function testHeadersExtractExistentCharsetWithQuotes()
49 {
50 $charset = 'x-MacCroatian';
51 $headers = 'text/html; charset="' . $charset . '"otherstuff="test"';
52 $this->assertEquals(strtolower($charset), header_extract_charset($headers));
53
54 $headers = 'text/html; charset=\'' . $charset . '\'otherstuff="test"';
55 $this->assertEquals(strtolower($charset), header_extract_charset($headers));
56 }
57
58 /**
59 * Test headers_extract_charset() when the charset is not found.
60 */
61 public function testHeadersExtractNonExistentCharset()
62 {
63 $headers = '';
64 $this->assertFalse(header_extract_charset($headers));
65
66 $headers = 'text/html';
67 $this->assertFalse(header_extract_charset($headers));
68 }
69
70 /**
71 * Test html_extract_charset() when the charset is found.
72 */
73 public function testHtmlExtractExistentCharset()
74 {
75 $charset = 'x-MacCroatian';
76 $html = '<html><meta>stuff2</meta><meta charset="' . $charset . '"/></html>';
77 $this->assertEquals(strtolower($charset), html_extract_charset($html));
78 }
79
80 /**
81 * Test html_extract_charset() when the charset is not found.
82 */
83 public function testHtmlExtractNonExistentCharset()
84 {
85 $html = '<html><meta>stuff</meta></html>';
86 $this->assertFalse(html_extract_charset($html));
87 $html = '<html><meta>stuff</meta><meta charset=""/></html>';
88 $this->assertFalse(html_extract_charset($html));
89 }
90
91 /**
92 * Test html_extract_tag() when the tag <meta name= is found.
93 */
94 public function testHtmlExtractExistentNameTag()
95 {
96 $description = 'Bob and Alice share cookies.';
97
98 // Simple one line
99 $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>';
100 $this->assertEquals($description, html_extract_tag('description', $html));
101
102 // Simple OpenGraph
103 $html = '<meta property="og:description" content="' . $description . '">';
104 $this->assertEquals($description, html_extract_tag('description', $html));
105
106 // Simple reversed OpenGraph
107 $html = '<meta content="' . $description . '" property="og:description">';
108 $this->assertEquals($description, html_extract_tag('description', $html));
109
110 // ItemProp OpenGraph
111 $html = '<meta itemprop="og:description" content="' . $description . '">';
112 $this->assertEquals($description, html_extract_tag('description', $html));
113
114 // OpenGraph without quotes
115 $html = '<meta property=og:description content="' . $description . '">';
116 $this->assertEquals($description, html_extract_tag('description', $html));
117
118 // OpenGraph reversed without quotes
119 $html = '<meta content="' . $description . '" property=og:description>';
120 $this->assertEquals($description, html_extract_tag('description', $html));
121
122 // OpenGraph with noise
123 $html = '<meta tag1="content1" property="og:description" tag2="content2" content="' .
124 $description . '" tag3="content3">';
125 $this->assertEquals($description, html_extract_tag('description', $html));
126
127 // OpenGraph reversed with noise
128 $html = '<meta tag1="content1" content="' . $description . '" ' .
129 'tag3="content3" tag2="content2" property="og:description">';
130 $this->assertEquals($description, html_extract_tag('description', $html));
131
132 // OpenGraph multiple properties start
133 $html = '<meta property="unrelated og:description" content="' . $description . '">';
134 $this->assertEquals($description, html_extract_tag('description', $html));
135
136 // OpenGraph multiple properties end
137 $html = '<meta property="og:description unrelated" content="' . $description . '">';
138 $this->assertEquals($description, html_extract_tag('description', $html));
139
140 // OpenGraph multiple properties both end
141 $html = '<meta property="og:unrelated1 og:description og:unrelated2" content="' . $description . '">';
142 $this->assertEquals($description, html_extract_tag('description', $html));
143
144 // OpenGraph multiple properties both end with noise
145 $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
146 'tag2="content2" content="' . $description . '" tag3="content3">';
147 $this->assertEquals($description, html_extract_tag('description', $html));
148
149 // OpenGraph reversed multiple properties start
150 $html = '<meta content="' . $description . '" property="unrelated og:description">';
151 $this->assertEquals($description, html_extract_tag('description', $html));
152
153 // OpenGraph reversed multiple properties end
154 $html = '<meta content="' . $description . '" property="og:description unrelated">';
155 $this->assertEquals($description, html_extract_tag('description', $html));
156
157 // OpenGraph reversed multiple properties both end
158 $html = '<meta content="' . $description . '" property="og:unrelated1 og:description og:unrelated2">';
159 $this->assertEquals($description, html_extract_tag('description', $html));
160
161 // OpenGraph reversed multiple properties both end with noise
162 $html = '<meta tag1="content1" content="' . $description . '" tag2="content2" '.
163 'property="og:unrelated1 og:description og:unrelated2" tag3="content3">';
164 $this->assertEquals($description, html_extract_tag('description', $html));
165
166 // Suggestion from #1375
167 $html = '<meta property="og:description" name="description" content="' . $description . '">';
168 $this->assertEquals($description, html_extract_tag('description', $html));
169 }
170
171 /**
172 * Test html_extract_tag() with double quoted content containing single quote, and the opposite.
173 */
174 public function testHtmlExtractExistentNameTagWithMixedQuotes(): void
175 {
176 $description = 'Bob and Alice share M&M\'s.';
177
178 $html = '<meta property="og:description" content="' . $description . '">';
179 $this->assertEquals($description, html_extract_tag('description', $html));
180
181 $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
182 'tag2="content2" content="' . $description . '" tag3="content3">';
183 $this->assertEquals($description, html_extract_tag('description', $html));
184
185 $html = '<meta property="og:description" name="description" content="' . $description . '">';
186 $this->assertEquals($description, html_extract_tag('description', $html));
187
188 $description = 'Bob and Alice share "cookies".';
189
190 $html = '<meta property="og:description" content=\'' . $description . '\'>';
191 $this->assertEquals($description, html_extract_tag('description', $html));
192
193 $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
194 'tag2="content2" content=\'' . $description . '\' tag3="content3">';
195 $this->assertEquals($description, html_extract_tag('description', $html));
196
197 $html = '<meta property="og:description" name="description" content=\'' . $description . '\'>';
198 $this->assertEquals($description, html_extract_tag('description', $html));
199 }
200
201 /**
202 * Test html_extract_tag() when the tag <meta name= is not found.
203 */
204 public function testHtmlExtractNonExistentNameTag()
205 {
206 $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
207 $this->assertFalse(html_extract_tag('description', $html));
208
209 // Partial meta tag
210 $html = '<meta content="Brief description">';
211 $this->assertFalse(html_extract_tag('description', $html));
212
213 $html = '<meta property="og:description">';
214 $this->assertFalse(html_extract_tag('description', $html));
215
216 $html = '<meta tag1="content1" property="og:description">';
217 $this->assertFalse(html_extract_tag('description', $html));
218
219 $html = '<meta property="og:description" tag1="content1">';
220 $this->assertFalse(html_extract_tag('description', $html));
221
222 $html = '<meta tag1="content1" content="Brief description">';
223 $this->assertFalse(html_extract_tag('description', $html));
224
225 $html = '<meta content="Brief description" tag1="content1">';
226 $this->assertFalse(html_extract_tag('description', $html));
227 }
228
229 /**
230 * Test html_extract_tag() when the tag <meta property="og: is found.
231 */
232 public function testHtmlExtractExistentOgTag()
233 {
234 $description = 'Bob and Alice share cookies.';
235 $html = '<html><meta>stuff2</meta><meta property="og:description" content="' . $description . '"/></html>';
236 $this->assertEquals($description, html_extract_tag('description', $html));
237 }
238
239 /**
240 * Test html_extract_tag() when the tag <meta property="og: is not found.
241 */
242 public function testHtmlExtractNonExistentOgTag()
243 {
244 $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
245 $this->assertFalse(html_extract_tag('description', $html));
246 }
247
248 /**
249 * Test the header callback with valid value
250 */
251 public function testCurlHeaderCallbackOk(): void
252 {
253 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ok');
254 $data = [
255 'HTTP/1.1 200 OK',
256 'Server: GitHub.com',
257 'Date: Sat, 28 Oct 2017 12:01:33 GMT',
258 'Content-Type: text/html; charset=utf-8',
259 'Status: 200 OK',
260 ];
261
262 foreach ($data as $chunk) {
263 static::assertIsInt($callback(null, $chunk));
264 }
265
266 static::assertSame('utf-8', $charset);
267 }
268
269 /**
270 * Test the download callback with valid value
271 */
272 public function testCurlDownloadCallbackOk(): void
273 {
274 $charset = 'utf-8';
275 $callback = get_curl_download_callback(
276 $charset,
277 $title,
278 $desc,
279 $keywords,
280 false
281 );
282
283 $data = [
284 'th=device-width">'
285 . '<title>Refactoring · GitHub</title>'
286 . '<link rel="search" type="application/opensea',
287 '<title>ignored</title>'
288 . '<meta name="description" content="desc" />'
289 . '<meta name="keywords" content="key1,key2" />',
290 ];
291
292 foreach ($data as $chunk) {
293 static::assertSame(strlen($chunk), $callback(null, $chunk));
294 }
295
296 static::assertSame('utf-8', $charset);
297 static::assertSame('Refactoring · GitHub', $title);
298 static::assertEmpty($desc);
299 static::assertEmpty($keywords);
300 }
301
302 /**
303 * Test the header callback with valid value
304 */
305 public function testCurlHeaderCallbackNoCharset(): void
306 {
307 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_no_charset');
308 $data = [
309 'HTTP/1.1 200 OK',
310 ];
311
312 foreach ($data as $chunk) {
313 static::assertSame(strlen($chunk), $callback(null, $chunk));
314 }
315
316 static::assertFalse($charset);
317 }
318
319 /**
320 * Test the download callback with valid values and no charset
321 */
322 public function testCurlDownloadCallbackOkNoCharset(): void
323 {
324 $charset = null;
325 $callback = get_curl_download_callback(
326 $charset,
327 $title,
328 $desc,
329 $keywords,
330 false
331 );
332
333 $data = [
334 'end' => 'th=device-width">'
335 . '<title>Refactoring · GitHub</title>'
336 . '<link rel="search" type="application/opensea',
337 '<title>ignored</title>'
338 . '<meta name="description" content="desc" />'
339 . '<meta name="keywords" content="key1,key2" />',
340 ];
341
342 foreach ($data as $chunk) {
343 static::assertSame(strlen($chunk), $callback(null, $chunk));
344 }
345
346 $this->assertEmpty($charset);
347 $this->assertEquals('Refactoring · GitHub', $title);
348 $this->assertEmpty($desc);
349 $this->assertEmpty($keywords);
350 }
351
352 /**
353 * Test the download callback with valid values and no charset
354 */
355 public function testCurlDownloadCallbackOkHtmlCharset(): void
356 {
357 $charset = null;
358 $callback = get_curl_download_callback(
359 $charset,
360 $title,
361 $desc,
362 $keywords,
363 false
364 );
365
366 $data = [
367 '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
368 'end' => 'th=device-width">'
369 . '<title>Refactoring · GitHub</title>'
370 . '<link rel="search" type="application/opensea',
371 '<title>ignored</title>'
372 . '<meta name="description" content="desc" />'
373 . '<meta name="keywords" content="key1,key2" />',
374 ];
375 foreach ($data as $chunk) {
376 static::assertSame(strlen($chunk), $callback(null, $chunk));
377 }
378
379 $this->assertEquals('utf-8', $charset);
380 $this->assertEquals('Refactoring · GitHub', $title);
381 $this->assertEmpty($desc);
382 $this->assertEmpty($keywords);
383 }
384
385 /**
386 * Test the download callback with valid values and no title
387 */
388 public function testCurlDownloadCallbackOkNoTitle(): void
389 {
390 $charset = 'utf-8';
391 $callback = get_curl_download_callback(
392 $charset,
393 $title,
394 $desc,
395 $keywords,
396 false
397 );
398
399 $data = [
400 'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
401 'ignored',
402 ];
403
404 foreach ($data as $chunk) {
405 static::assertSame(strlen($chunk), $callback(null, $chunk));
406 }
407
408 $this->assertEquals('utf-8', $charset);
409 $this->assertEmpty($title);
410 $this->assertEmpty($desc);
411 $this->assertEmpty($keywords);
412 }
413
414 /**
415 * Test the header callback with an invalid content type.
416 */
417 public function testCurlHeaderCallbackInvalidContentType(): void
418 {
419 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ct_ko');
420 $data = [
421 'HTTP/1.1 200 OK',
422 ];
423
424 static::assertFalse($callback(null, $data[0]));
425 static::assertNull($charset);
426 }
427
428 /**
429 * Test the header callback with an invalid response code.
430 */
431 public function testCurlHeaderCallbackInvalidResponseCode(): void
432 {
433 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rc_ko');
434
435 static::assertFalse($callback(null, ''));
436 static::assertNull($charset);
437 }
438
439 /**
440 * Test the header callback with an invalid content type and response code.
441 */
442 public function testCurlHeaderCallbackInvalidContentTypeAndResponseCode(): void
443 {
444 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rs_ct_ko');
445
446 static::assertFalse($callback(null, ''));
447 static::assertNull($charset);
448 }
449
450 /**
451 * Test the download callback with valid value, and retrieve_description option enabled.
452 */
453 public function testCurlDownloadCallbackOkWithDesc(): void
454 {
455 $charset = 'utf-8';
456 $callback = get_curl_download_callback(
457 $charset,
458 $title,
459 $desc,
460 $keywords,
461 true
462 );
463 $data = [
464 'th=device-width">'
465 . '<title>Refactoring · GitHub</title>'
466 . '<link rel="search" type="application/opensea',
467 'end' => '<title>ignored</title>'
468 . '<meta name="description" content="link desc" />'
469 . '<meta name="keywords" content="key1,key2" />',
470 ];
471
472 foreach ($data as $chunk) {
473 static::assertSame(strlen($chunk), $callback(null, $chunk));
474 }
475
476 $this->assertEquals('utf-8', $charset);
477 $this->assertEquals('Refactoring · GitHub', $title);
478 $this->assertEquals('link desc', $desc);
479 $this->assertEquals('key1 key2', $keywords);
480 }
481
482 /**
483 * Test the download callback with valid value, and retrieve_description option enabled,
484 * but no desc or keyword defined in the page.
485 */
486 public function testCurlDownloadCallbackOkWithDescNotFound(): void
487 {
488 $charset = 'utf-8';
489 $callback = get_curl_download_callback(
490 $charset,
491 $title,
492 $desc,
493 $keywords,
494 true,
495 'ut_curl_getinfo_ok'
496 );
497 $data = [
498 'th=device-width">'
499 . '<title>Refactoring · GitHub</title>'
500 . '<link rel="search" type="application/opensea',
501 'end' => '<title>ignored</title>',
502 ];
503
504 foreach ($data as $chunk) {
505 static::assertSame(strlen($chunk), $callback(null, $chunk));
506 }
507
508 $this->assertEquals('utf-8', $charset);
509 $this->assertEquals('Refactoring · GitHub', $title);
510 $this->assertEmpty($desc);
511 $this->assertEmpty($keywords);
512 }
513
514 /**
515 * Test text2clickable.
516 */
517 public function testText2clickable()
518 {
519 $text = 'stuff http://hello.there/is=someone#here otherstuff';
520 $expectedText = 'stuff <a href="http://hello.there/is=someone#here">'
521 . 'http://hello.there/is=someone#here</a> otherstuff';
522 $processedText = text2clickable($text);
523 $this->assertEquals($expectedText, $processedText);
524
525 $text = 'stuff http://hello.there/is=someone#here(please) otherstuff';
526 $expectedText = 'stuff <a href="http://hello.there/is=someone#here(please)">'
527 . 'http://hello.there/is=someone#here(please)</a> otherstuff';
528 $processedText = text2clickable($text);
529 $this->assertEquals($expectedText, $processedText);
530
531 $text = 'stuff http://hello.there/is=someone#here(please)&no otherstuff';
532 $text = 'stuff http://hello.there/is=someone#here(please)&no otherstuff';
533 $expectedText = 'stuff <a href="http://hello.there/is=someone#here(please)&no">'
534 . 'http://hello.there/is=someone#here(please)&no</a> otherstuff';
535 $processedText = text2clickable($text);
536 $this->assertEquals($expectedText, $processedText);
537 }
538
539 /**
540 * Test testSpace2nbsp.
541 */
542 public function testSpace2nbsp()
543 {
544 $text = ' Are you thrilled by flags ?' . PHP_EOL . ' Really?';
545 $expectedText = '&nbsp; Are you &nbsp; thrilled &nbsp;by flags &nbsp; ?' . PHP_EOL . '&nbsp;Really?';
546 $processedText = space2nbsp($text);
547 $this->assertEquals($expectedText, $processedText);
548 }
549
550 /**
551 * Test hashtags auto-link.
552 */
553 public function testHashtagAutolink()
554 {
555 $index = 'http://domain.tld/';
556 $rawDescription = '#hashtag\n
557 # nothashtag\n
558 test#nothashtag #hashtag \#nothashtag\n
559 test #hashtag #hashtag test #hashtag.test\n
560 #hashtag #hashtag-nothashtag #hashtag_hashtag\n
561 What is #ашок anyway?\n
562 カタカナ #カタカナ」カタカナ\n';
563 $autolinkedDescription = hashtag_autolink($rawDescription, $index);
564
565 $this->assertContainsPolyfill($this->getHashtagLink('hashtag', $index), $autolinkedDescription);
566 $this->assertNotContainsPolyfill(' #hashtag', $autolinkedDescription);
567 $this->assertNotContainsPolyfill('>#nothashtag', $autolinkedDescription);
568 $this->assertContainsPolyfill($this->getHashtagLink('ашок', $index), $autolinkedDescription);
569 $this->assertContainsPolyfill($this->getHashtagLink('カタカナ', $index), $autolinkedDescription);
570 $this->assertContainsPolyfill($this->getHashtagLink('hashtag_hashtag', $index), $autolinkedDescription);
571 $this->assertNotContainsPolyfill($this->getHashtagLink('hashtag-nothashtag', $index), $autolinkedDescription);
572 }
573
574 /**
575 * Test hashtags auto-link without index URL.
576 */
577 public function testHashtagAutolinkNoIndex()
578 {
579 $rawDescription = 'blabla #hashtag x#nothashtag';
580 $autolinkedDescription = hashtag_autolink($rawDescription);
581
582 $this->assertContainsPolyfill($this->getHashtagLink('hashtag'), $autolinkedDescription);
583 $this->assertNotContainsPolyfill(' #hashtag', $autolinkedDescription);
584 $this->assertNotContainsPolyfill('>#nothashtag', $autolinkedDescription);
585 }
586
587 /**
588 * Test is_note with note URLs.
589 */
590 public function testIsNote()
591 {
592 $this->assertTrue(is_note('?'));
593 $this->assertTrue(is_note('?abcDEf'));
594 $this->assertTrue(is_note('?_abcDEf#123'));
595 }
596
597 /**
598 * Test is_note with non note URLs.
599 */
600 public function testIsNotNote()
601 {
602 $this->assertFalse(is_note(''));
603 $this->assertFalse(is_note('nope'));
604 $this->assertFalse(is_note('https://github.com/shaarli/Shaarli/?hi'));
605 }
606
607 /**
608 * Util function to build an hashtag link.
609 *
610 * @param string $hashtag Hashtag name.
611 * @param string $index Index URL.
612 *
613 * @return string HTML hashtag link.
614 */
615 private function getHashtagLink($hashtag, $index = '')
616 {
617 $hashtagLink = '<a href="' . $index . './add-tag/$1" title="Hashtag $1">#$1</a>';
618 return str_replace('$1', $hashtag, $hashtagLink);
619 }
620 }