diff options
Diffstat (limited to 'application/http/HttpUtils.php')
-rw-r--r-- | application/http/HttpUtils.php | 125 |
1 files changed, 121 insertions, 4 deletions
diff --git a/application/http/HttpUtils.php b/application/http/HttpUtils.php index 2ea9195d..9f414073 100644 --- a/application/http/HttpUtils.php +++ b/application/http/HttpUtils.php | |||
@@ -369,7 +369,11 @@ function server_url($server) | |||
369 | */ | 369 | */ |
370 | function index_url($server) | 370 | function index_url($server) |
371 | { | 371 | { |
372 | $scriptname = $server['SCRIPT_NAME']; | 372 | if (defined('SHAARLI_ROOT_URL') && null !== SHAARLI_ROOT_URL) { |
373 | return rtrim(SHAARLI_ROOT_URL, '/') . '/'; | ||
374 | } | ||
375 | |||
376 | $scriptname = !empty($server['SCRIPT_NAME']) ? $server['SCRIPT_NAME'] : '/'; | ||
373 | if (endsWith($scriptname, 'index.php')) { | 377 | if (endsWith($scriptname, 'index.php')) { |
374 | $scriptname = substr($scriptname, 0, -9); | 378 | $scriptname = substr($scriptname, 0, -9); |
375 | } | 379 | } |
@@ -377,7 +381,7 @@ function index_url($server) | |||
377 | } | 381 | } |
378 | 382 | ||
379 | /** | 383 | /** |
380 | * Returns the absolute URL of the current script, with the query | 384 | * Returns the absolute URL of the current script, with current route and query |
381 | * | 385 | * |
382 | * If the resource is "index.php", then it is removed (for better-looking URLs) | 386 | * If the resource is "index.php", then it is removed (for better-looking URLs) |
383 | * | 387 | * |
@@ -387,10 +391,17 @@ function index_url($server) | |||
387 | */ | 391 | */ |
388 | function page_url($server) | 392 | function page_url($server) |
389 | { | 393 | { |
394 | $scriptname = $server['SCRIPT_NAME'] ?? ''; | ||
395 | if (endsWith($scriptname, 'index.php')) { | ||
396 | $scriptname = substr($scriptname, 0, -9); | ||
397 | } | ||
398 | |||
399 | $route = preg_replace('@^' . $scriptname . '@', '', $server['REQUEST_URI'] ?? ''); | ||
390 | if (! empty($server['QUERY_STRING'])) { | 400 | if (! empty($server['QUERY_STRING'])) { |
391 | return index_url($server).'?'.$server['QUERY_STRING']; | 401 | return index_url($server) . $route . '?' . $server['QUERY_STRING']; |
392 | } | 402 | } |
393 | return index_url($server); | 403 | |
404 | return index_url($server) . $route; | ||
394 | } | 405 | } |
395 | 406 | ||
396 | /** | 407 | /** |
@@ -477,3 +488,109 @@ function is_https($server) | |||
477 | 488 | ||
478 | return ! empty($server['HTTPS']); | 489 | return ! empty($server['HTTPS']); |
479 | } | 490 | } |
491 | |||
492 | /** | ||
493 | * Get cURL callback function for CURLOPT_WRITEFUNCTION | ||
494 | * | ||
495 | * @param string $charset to extract from the downloaded page (reference) | ||
496 | * @param string $title to extract from the downloaded page (reference) | ||
497 | * @param string $description to extract from the downloaded page (reference) | ||
498 | * @param string $keywords to extract from the downloaded page (reference) | ||
499 | * @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content | ||
500 | * @param string $curlGetInfo Optionally overrides curl_getinfo function | ||
501 | * | ||
502 | * @return Closure | ||
503 | */ | ||
504 | function get_curl_download_callback( | ||
505 | &$charset, | ||
506 | &$title, | ||
507 | &$description, | ||
508 | &$keywords, | ||
509 | $retrieveDescription, | ||
510 | $curlGetInfo = 'curl_getinfo' | ||
511 | ) { | ||
512 | $isRedirected = false; | ||
513 | $currentChunk = 0; | ||
514 | $foundChunk = null; | ||
515 | |||
516 | /** | ||
517 | * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download). | ||
518 | * | ||
519 | * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text' | ||
520 | * Then we extract the title and the charset and stop the download when it's done. | ||
521 | * | ||
522 | * @param resource $ch cURL resource | ||
523 | * @param string $data chunk of data being downloaded | ||
524 | * | ||
525 | * @return int|bool length of $data or false if we need to stop the download | ||
526 | */ | ||
527 | return function (&$ch, $data) use ( | ||
528 | $retrieveDescription, | ||
529 | $curlGetInfo, | ||
530 | &$charset, | ||
531 | &$title, | ||
532 | &$description, | ||
533 | &$keywords, | ||
534 | &$isRedirected, | ||
535 | &$currentChunk, | ||
536 | &$foundChunk | ||
537 | ) { | ||
538 | $currentChunk++; | ||
539 | $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); | ||
540 | if (!empty($responseCode) && in_array($responseCode, [301, 302])) { | ||
541 | $isRedirected = true; | ||
542 | return strlen($data); | ||
543 | } | ||
544 | if (!empty($responseCode) && $responseCode !== 200) { | ||
545 | return false; | ||
546 | } | ||
547 | // After a redirection, the content type will keep the previous request value | ||
548 | // until it finds the next content-type header. | ||
549 | if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) { | ||
550 | $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE); | ||
551 | } | ||
552 | if (!empty($contentType) && strpos($contentType, 'text/html') === false) { | ||
553 | return false; | ||
554 | } | ||
555 | if (!empty($contentType) && empty($charset)) { | ||
556 | $charset = header_extract_charset($contentType); | ||
557 | } | ||
558 | if (empty($charset)) { | ||
559 | $charset = html_extract_charset($data); | ||
560 | } | ||
561 | if (empty($title)) { | ||
562 | $title = html_extract_title($data); | ||
563 | $foundChunk = ! empty($title) ? $currentChunk : $foundChunk; | ||
564 | } | ||
565 | if ($retrieveDescription && empty($description)) { | ||
566 | $description = html_extract_tag('description', $data); | ||
567 | $foundChunk = ! empty($description) ? $currentChunk : $foundChunk; | ||
568 | } | ||
569 | if ($retrieveDescription && empty($keywords)) { | ||
570 | $keywords = html_extract_tag('keywords', $data); | ||
571 | if (! empty($keywords)) { | ||
572 | $foundChunk = $currentChunk; | ||
573 | // Keywords use the format tag1, tag2 multiple words, tag | ||
574 | // So we format them to match Shaarli's separator and glue multiple words with '-' | ||
575 | $keywords = implode(' ', array_map(function($keyword) { | ||
576 | return implode('-', preg_split('/\s+/', trim($keyword))); | ||
577 | }, explode(',', $keywords))); | ||
578 | } | ||
579 | } | ||
580 | |||
581 | // We got everything we want, stop the download. | ||
582 | // If we already found either the title, description or keywords, | ||
583 | // it's highly unlikely that we'll found the other metas further than | ||
584 | // in the same chunk of data or the next one. So we also stop the download after that. | ||
585 | if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null | ||
586 | && (! $retrieveDescription | ||
587 | || $foundChunk < $currentChunk | ||
588 | || (!empty($title) && !empty($description) && !empty($keywords)) | ||
589 | ) | ||
590 | ) { | ||
591 | return false; | ||
592 | } | ||
593 | |||
594 | return strlen($data); | ||
595 | }; | ||
596 | } | ||