]> git.immae.eu Git - github/shaarli/Shaarli.git/blob - tests/bookmark/LinkUtilsTest.php
3321242fae07f018c91b98b2f40067fa2d6a9e22
[github/shaarli/Shaarli.git] / tests / bookmark / LinkUtilsTest.php
1 <?php
2
3 namespace Shaarli\Bookmark;
4
5 use Shaarli\TestCase;
6
7 require_once 'tests/utils/CurlUtils.php';
8
9 /**
10 * Class LinkUtilsTest.
11 */
12 class LinkUtilsTest extends TestCase
13 {
14 /**
15 * Test html_extract_title() when the title is found.
16 */
17 public function testHtmlExtractExistentTitle()
18 {
19 $title = 'Read me please.';
20 $html = '<html><meta>stuff</meta><title>' . $title . '</title></html>';
21 $this->assertEquals($title, html_extract_title($html));
22 $html = '<html><title>' . $title . '</title>blabla<title>another</title></html>';
23 $this->assertEquals($title, html_extract_title($html));
24 }
25
26 /**
27 * Test html_extract_title() when the title is not found.
28 */
29 public function testHtmlExtractNonExistentTitle()
30 {
31 $html = '<html><meta>stuff</meta></html>';
32 $this->assertFalse(html_extract_title($html));
33 }
34
35 /**
36 * Test headers_extract_charset() when the charset is found.
37 */
38 public function testHeadersExtractExistentCharset()
39 {
40 $charset = 'x-MacCroatian';
41 $headers = 'text/html; charset=' . $charset;
42 $this->assertEquals(strtolower($charset), header_extract_charset($headers));
43 }
44
45 /**
46 * Test headers_extract_charset() when the charset is found with odd quotes.
47 */
48 public function testHeadersExtractExistentCharsetWithQuotes()
49 {
50 $charset = 'x-MacCroatian';
51 $headers = 'text/html; charset="' . $charset . '"otherstuff="test"';
52 $this->assertEquals(strtolower($charset), header_extract_charset($headers));
53
54 $headers = 'text/html; charset=\'' . $charset . '\'otherstuff="test"';
55 $this->assertEquals(strtolower($charset), header_extract_charset($headers));
56 }
57
58 /**
59 * Test headers_extract_charset() when the charset is not found.
60 */
61 public function testHeadersExtractNonExistentCharset()
62 {
63 $headers = '';
64 $this->assertFalse(header_extract_charset($headers));
65
66 $headers = 'text/html';
67 $this->assertFalse(header_extract_charset($headers));
68 }
69
70 /**
71 * Test html_extract_charset() when the charset is found.
72 */
73 public function testHtmlExtractExistentCharset()
74 {
75 $charset = 'x-MacCroatian';
76 $html = '<html><meta>stuff2</meta><meta charset="' . $charset . '"/></html>';
77 $this->assertEquals(strtolower($charset), html_extract_charset($html));
78 }
79
80 /**
81 * Test html_extract_charset() when the charset is not found.
82 */
83 public function testHtmlExtractNonExistentCharset()
84 {
85 $html = '<html><meta>stuff</meta></html>';
86 $this->assertFalse(html_extract_charset($html));
87 $html = '<html><meta>stuff</meta><meta charset=""/></html>';
88 $this->assertFalse(html_extract_charset($html));
89 }
90
91 /**
92 * Test html_extract_tag() when the tag <meta name= is found.
93 */
94 public function testHtmlExtractExistentNameTag()
95 {
96 $description = 'Bob and Alice share cookies.';
97
98 // Simple one line
99 $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>';
100 $this->assertEquals($description, html_extract_tag('description', $html));
101
102 // Simple OpenGraph
103 $html = '<meta property="og:description" content="' . $description . '">';
104 $this->assertEquals($description, html_extract_tag('description', $html));
105
106 // Simple reversed OpenGraph
107 $html = '<meta content="' . $description . '" property="og:description">';
108 $this->assertEquals($description, html_extract_tag('description', $html));
109
110 // ItemProp OpenGraph
111 $html = '<meta itemprop="og:description" content="' . $description . '">';
112 $this->assertEquals($description, html_extract_tag('description', $html));
113
114 // OpenGraph without quotes
115 $html = '<meta property=og:description content="' . $description . '">';
116 $this->assertEquals($description, html_extract_tag('description', $html));
117
118 // OpenGraph reversed without quotes
119 $html = '<meta content="' . $description . '" property=og:description>';
120 $this->assertEquals($description, html_extract_tag('description', $html));
121
122 // OpenGraph with noise
123 $html = '<meta tag1="content1" property="og:description" tag2="content2" content="' .
124 $description . '" tag3="content3">';
125 $this->assertEquals($description, html_extract_tag('description', $html));
126
127 // OpenGraph reversed with noise
128 $html = '<meta tag1="content1" content="' . $description . '" ' .
129 'tag3="content3" tag2="content2" property="og:description">';
130 $this->assertEquals($description, html_extract_tag('description', $html));
131
132 // OpenGraph multiple properties start
133 $html = '<meta property="unrelated og:description" content="' . $description . '">';
134 $this->assertEquals($description, html_extract_tag('description', $html));
135
136 // OpenGraph multiple properties end
137 $html = '<meta property="og:description unrelated" content="' . $description . '">';
138 $this->assertEquals($description, html_extract_tag('description', $html));
139
140 // OpenGraph multiple properties both end
141 $html = '<meta property="og:unrelated1 og:description og:unrelated2" content="' . $description . '">';
142 $this->assertEquals($description, html_extract_tag('description', $html));
143
144 // OpenGraph multiple properties both end with noise
145 $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
146 'tag2="content2" content="' . $description . '" tag3="content3">';
147 $this->assertEquals($description, html_extract_tag('description', $html));
148
149 // OpenGraph reversed multiple properties start
150 $html = '<meta content="' . $description . '" property="unrelated og:description">';
151 $this->assertEquals($description, html_extract_tag('description', $html));
152
153 // OpenGraph reversed multiple properties end
154 $html = '<meta content="' . $description . '" property="og:description unrelated">';
155 $this->assertEquals($description, html_extract_tag('description', $html));
156
157 // OpenGraph reversed multiple properties both end
158 $html = '<meta content="' . $description . '" property="og:unrelated1 og:description og:unrelated2">';
159 $this->assertEquals($description, html_extract_tag('description', $html));
160
161 // OpenGraph reversed multiple properties both end with noise
162 $html = '<meta tag1="content1" content="' . $description . '" tag2="content2" '.
163 'property="og:unrelated1 og:description og:unrelated2" tag3="content3">';
164 $this->assertEquals($description, html_extract_tag('description', $html));
165
166 // Suggestion from #1375
167 $html = '<meta property="og:description" name="description" content="' . $description . '">';
168 $this->assertEquals($description, html_extract_tag('description', $html));
169 }
170
171 /**
172 * Test html_extract_tag() when the tag <meta name= is not found.
173 */
174 public function testHtmlExtractNonExistentNameTag()
175 {
176 $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
177 $this->assertFalse(html_extract_tag('description', $html));
178
179 // Partial meta tag
180 $html = '<meta content="Brief description">';
181 $this->assertFalse(html_extract_tag('description', $html));
182
183 $html = '<meta property="og:description">';
184 $this->assertFalse(html_extract_tag('description', $html));
185
186 $html = '<meta tag1="content1" property="og:description">';
187 $this->assertFalse(html_extract_tag('description', $html));
188
189 $html = '<meta property="og:description" tag1="content1">';
190 $this->assertFalse(html_extract_tag('description', $html));
191
192 $html = '<meta tag1="content1" content="Brief description">';
193 $this->assertFalse(html_extract_tag('description', $html));
194
195 $html = '<meta content="Brief description" tag1="content1">';
196 $this->assertFalse(html_extract_tag('description', $html));
197 }
198
199 /**
200 * Test html_extract_tag() when the tag <meta property="og: is found.
201 */
202 public function testHtmlExtractExistentOgTag()
203 {
204 $description = 'Bob and Alice share cookies.';
205 $html = '<html><meta>stuff2</meta><meta property="og:description" content="' . $description . '"/></html>';
206 $this->assertEquals($description, html_extract_tag('description', $html));
207 }
208
209 /**
210 * Test html_extract_tag() when the tag <meta property="og: is not found.
211 */
212 public function testHtmlExtractNonExistentOgTag()
213 {
214 $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
215 $this->assertFalse(html_extract_tag('description', $html));
216 }
217
218 /**
219 * Test the header callback with valid value
220 */
221 public function testCurlHeaderCallbackOk(): void
222 {
223 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ok');
224 $data = [
225 'HTTP/1.1 200 OK',
226 'Server: GitHub.com',
227 'Date: Sat, 28 Oct 2017 12:01:33 GMT',
228 'Content-Type: text/html; charset=utf-8',
229 'Status: 200 OK',
230 ];
231
232 foreach ($data as $chunk) {
233 static::assertIsInt($callback(null, $chunk));
234 }
235
236 static::assertSame('utf-8', $charset);
237 }
238
239 /**
240 * Test the download callback with valid value
241 */
242 public function testCurlDownloadCallbackOk(): void
243 {
244 $charset = 'utf-8';
245 $callback = get_curl_download_callback(
246 $charset,
247 $title,
248 $desc,
249 $keywords,
250 false
251 );
252
253 $data = [
254 'th=device-width">'
255 . '<title>Refactoring · GitHub</title>'
256 . '<link rel="search" type="application/opensea',
257 '<title>ignored</title>'
258 . '<meta name="description" content="desc" />'
259 . '<meta name="keywords" content="key1,key2" />',
260 ];
261
262 foreach ($data as $chunk) {
263 static::assertSame(strlen($chunk), $callback(null, $chunk));
264 }
265
266 static::assertSame('utf-8', $charset);
267 static::assertSame('Refactoring · GitHub', $title);
268 static::assertEmpty($desc);
269 static::assertEmpty($keywords);
270 }
271
272 /**
273 * Test the header callback with valid value
274 */
275 public function testCurlHeaderCallbackNoCharset(): void
276 {
277 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_no_charset');
278 $data = [
279 'HTTP/1.1 200 OK',
280 ];
281
282 foreach ($data as $chunk) {
283 static::assertSame(strlen($chunk), $callback(null, $chunk));
284 }
285
286 static::assertFalse($charset);
287 }
288
289 /**
290 * Test the download callback with valid values and no charset
291 */
292 public function testCurlDownloadCallbackOkNoCharset(): void
293 {
294 $charset = null;
295 $callback = get_curl_download_callback(
296 $charset,
297 $title,
298 $desc,
299 $keywords,
300 false
301 );
302
303 $data = [
304 'end' => 'th=device-width">'
305 . '<title>Refactoring · GitHub</title>'
306 . '<link rel="search" type="application/opensea',
307 '<title>ignored</title>'
308 . '<meta name="description" content="desc" />'
309 . '<meta name="keywords" content="key1,key2" />',
310 ];
311
312 foreach ($data as $chunk) {
313 static::assertSame(strlen($chunk), $callback(null, $chunk));
314 }
315
316 $this->assertEmpty($charset);
317 $this->assertEquals('Refactoring · GitHub', $title);
318 $this->assertEmpty($desc);
319 $this->assertEmpty($keywords);
320 }
321
322 /**
323 * Test the download callback with valid values and no charset
324 */
325 public function testCurlDownloadCallbackOkHtmlCharset(): void
326 {
327 $charset = null;
328 $callback = get_curl_download_callback(
329 $charset,
330 $title,
331 $desc,
332 $keywords,
333 false
334 );
335
336 $data = [
337 '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
338 'end' => 'th=device-width">'
339 . '<title>Refactoring · GitHub</title>'
340 . '<link rel="search" type="application/opensea',
341 '<title>ignored</title>'
342 . '<meta name="description" content="desc" />'
343 . '<meta name="keywords" content="key1,key2" />',
344 ];
345 foreach ($data as $chunk) {
346 static::assertSame(strlen($chunk), $callback(null, $chunk));
347 }
348
349 $this->assertEquals('utf-8', $charset);
350 $this->assertEquals('Refactoring · GitHub', $title);
351 $this->assertEmpty($desc);
352 $this->assertEmpty($keywords);
353 }
354
355 /**
356 * Test the download callback with valid values and no title
357 */
358 public function testCurlDownloadCallbackOkNoTitle(): void
359 {
360 $charset = 'utf-8';
361 $callback = get_curl_download_callback(
362 $charset,
363 $title,
364 $desc,
365 $keywords,
366 false
367 );
368
369 $data = [
370 'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
371 'ignored',
372 ];
373
374 foreach ($data as $chunk) {
375 static::assertSame(strlen($chunk), $callback(null, $chunk));
376 }
377
378 $this->assertEquals('utf-8', $charset);
379 $this->assertEmpty($title);
380 $this->assertEmpty($desc);
381 $this->assertEmpty($keywords);
382 }
383
384 /**
385 * Test the header callback with an invalid content type.
386 */
387 public function testCurlHeaderCallbackInvalidContentType(): void
388 {
389 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ct_ko');
390 $data = [
391 'HTTP/1.1 200 OK',
392 ];
393
394 static::assertFalse($callback(null, $data[0]));
395 static::assertNull($charset);
396 }
397
398 /**
399 * Test the header callback with an invalid response code.
400 */
401 public function testCurlHeaderCallbackInvalidResponseCode(): void
402 {
403 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rc_ko');
404
405 static::assertFalse($callback(null, ''));
406 static::assertNull($charset);
407 }
408
409 /**
410 * Test the header callback with an invalid content type and response code.
411 */
412 public function testCurlHeaderCallbackInvalidContentTypeAndResponseCode(): void
413 {
414 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rs_ct_ko');
415
416 static::assertFalse($callback(null, ''));
417 static::assertNull($charset);
418 }
419
420 /**
421 * Test the download callback with valid value, and retrieve_description option enabled.
422 */
423 public function testCurlDownloadCallbackOkWithDesc(): void
424 {
425 $charset = 'utf-8';
426 $callback = get_curl_download_callback(
427 $charset,
428 $title,
429 $desc,
430 $keywords,
431 true
432 );
433 $data = [
434 'th=device-width">'
435 . '<title>Refactoring · GitHub</title>'
436 . '<link rel="search" type="application/opensea',
437 'end' => '<title>ignored</title>'
438 . '<meta name="description" content="link desc" />'
439 . '<meta name="keywords" content="key1,key2" />',
440 ];
441
442 foreach ($data as $chunk) {
443 static::assertSame(strlen($chunk), $callback(null, $chunk));
444 }
445
446 $this->assertEquals('utf-8', $charset);
447 $this->assertEquals('Refactoring · GitHub', $title);
448 $this->assertEquals('link desc', $desc);
449 $this->assertEquals('key1 key2', $keywords);
450 }
451
452 /**
453 * Test the download callback with valid value, and retrieve_description option enabled,
454 * but no desc or keyword defined in the page.
455 */
456 public function testCurlDownloadCallbackOkWithDescNotFound(): void
457 {
458 $charset = 'utf-8';
459 $callback = get_curl_download_callback(
460 $charset,
461 $title,
462 $desc,
463 $keywords,
464 true,
465 'ut_curl_getinfo_ok'
466 );
467 $data = [
468 'th=device-width">'
469 . '<title>Refactoring · GitHub</title>'
470 . '<link rel="search" type="application/opensea',
471 'end' => '<title>ignored</title>',
472 ];
473
474 foreach ($data as $chunk) {
475 static::assertSame(strlen($chunk), $callback(null, $chunk));
476 }
477
478 $this->assertEquals('utf-8', $charset);
479 $this->assertEquals('Refactoring · GitHub', $title);
480 $this->assertEmpty($desc);
481 $this->assertEmpty($keywords);
482 }
483
484 /**
485 * Test text2clickable.
486 */
487 public function testText2clickable()
488 {
489 $text = 'stuff http://hello.there/is=someone#here otherstuff';
490 $expectedText = 'stuff <a href="http://hello.there/is=someone#here">'
491 . 'http://hello.there/is=someone#here</a> otherstuff';
492 $processedText = text2clickable($text);
493 $this->assertEquals($expectedText, $processedText);
494
495 $text = 'stuff http://hello.there/is=someone#here(please) otherstuff';
496 $expectedText = 'stuff <a href="http://hello.there/is=someone#here(please)">'
497 . 'http://hello.there/is=someone#here(please)</a> otherstuff';
498 $processedText = text2clickable($text);
499 $this->assertEquals($expectedText, $processedText);
500
501 $text = 'stuff http://hello.there/is=someone#here(please)&no otherstuff';
502 $text = 'stuff http://hello.there/is=someone#here(please)&no otherstuff';
503 $expectedText = 'stuff <a href="http://hello.there/is=someone#here(please)&no">'
504 . 'http://hello.there/is=someone#here(please)&no</a> otherstuff';
505 $processedText = text2clickable($text);
506 $this->assertEquals($expectedText, $processedText);
507 }
508
509 /**
510 * Test testSpace2nbsp.
511 */
512 public function testSpace2nbsp()
513 {
514 $text = ' Are you thrilled by flags ?' . PHP_EOL . ' Really?';
515 $expectedText = '&nbsp; Are you &nbsp; thrilled &nbsp;by flags &nbsp; ?' . PHP_EOL . '&nbsp;Really?';
516 $processedText = space2nbsp($text);
517 $this->assertEquals($expectedText, $processedText);
518 }
519
520 /**
521 * Test hashtags auto-link.
522 */
523 public function testHashtagAutolink()
524 {
525 $index = 'http://domain.tld/';
526 $rawDescription = '#hashtag\n
527 # nothashtag\n
528 test#nothashtag #hashtag \#nothashtag\n
529 test #hashtag #hashtag test #hashtag.test\n
530 #hashtag #hashtag-nothashtag #hashtag_hashtag\n
531 What is #ашок anyway?\n
532 カタカナ #カタカナ」カタカナ\n';
533 $autolinkedDescription = hashtag_autolink($rawDescription, $index);
534
535 $this->assertContainsPolyfill($this->getHashtagLink('hashtag', $index), $autolinkedDescription);
536 $this->assertNotContainsPolyfill(' #hashtag', $autolinkedDescription);
537 $this->assertNotContainsPolyfill('>#nothashtag', $autolinkedDescription);
538 $this->assertContainsPolyfill($this->getHashtagLink('ашок', $index), $autolinkedDescription);
539 $this->assertContainsPolyfill($this->getHashtagLink('カタカナ', $index), $autolinkedDescription);
540 $this->assertContainsPolyfill($this->getHashtagLink('hashtag_hashtag', $index), $autolinkedDescription);
541 $this->assertNotContainsPolyfill($this->getHashtagLink('hashtag-nothashtag', $index), $autolinkedDescription);
542 }
543
544 /**
545 * Test hashtags auto-link without index URL.
546 */
547 public function testHashtagAutolinkNoIndex()
548 {
549 $rawDescription = 'blabla #hashtag x#nothashtag';
550 $autolinkedDescription = hashtag_autolink($rawDescription);
551
552 $this->assertContainsPolyfill($this->getHashtagLink('hashtag'), $autolinkedDescription);
553 $this->assertNotContainsPolyfill(' #hashtag', $autolinkedDescription);
554 $this->assertNotContainsPolyfill('>#nothashtag', $autolinkedDescription);
555 }
556
557 /**
558 * Test is_note with note URLs.
559 */
560 public function testIsNote()
561 {
562 $this->assertTrue(is_note('?'));
563 $this->assertTrue(is_note('?abcDEf'));
564 $this->assertTrue(is_note('?_abcDEf#123'));
565 }
566
567 /**
568 * Test is_note with non note URLs.
569 */
570 public function testIsNotNote()
571 {
572 $this->assertFalse(is_note(''));
573 $this->assertFalse(is_note('nope'));
574 $this->assertFalse(is_note('https://github.com/shaarli/Shaarli/?hi'));
575 }
576
577 /**
578 * Util function to build an hashtag link.
579 *
580 * @param string $hashtag Hashtag name.
581 * @param string $index Index URL.
582 *
583 * @return string HTML hashtag link.
584 */
585 private function getHashtagLink($hashtag, $index = '')
586 {
587 $hashtagLink = '<a href="' . $index . './add-tag/$1" title="Hashtag $1">#$1</a>';
588 return str_replace('$1', $hashtag, $hashtagLink);
589 }
590 }