diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-11-12 13:02:36 +0100 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2020-11-12 13:02:36 +0100 |
commit | 1409f1c89a7ca01456ae2dcd6357d296e2b99f5a (patch) | |
tree | ffa30a9358e82d27be75d8fc5e57f3c8820dc6d3 /application/http/HttpUtils.php | |
parent | 054e03f37fa29da8066f1a637919f13c7e7dc5d2 (diff) | |
parent | a6935feb22df8d9634189ee87d257da9f03eedbd (diff) | |
download | Shaarli-v0.12.tar.gz Shaarli-v0.12.tar.zst Shaarli-v0.12.zip |
Diffstat (limited to 'application/http/HttpUtils.php')
-rw-r--r-- | application/http/HttpUtils.php | 198 |
1 files changed, 119 insertions, 79 deletions
diff --git a/application/http/HttpUtils.php b/application/http/HttpUtils.php index 9f414073..4bde1d5b 100644 --- a/application/http/HttpUtils.php +++ b/application/http/HttpUtils.php | |||
@@ -6,12 +6,14 @@ use Shaarli\Http\Url; | |||
6 | * GET an HTTP URL to retrieve its content | 6 | * GET an HTTP URL to retrieve its content |
7 | * Uses the cURL library or a fallback method | 7 | * Uses the cURL library or a fallback method |
8 | * | 8 | * |
9 | * @param string $url URL to get (http://...) | 9 | * @param string $url URL to get (http://...) |
10 | * @param int $timeout network timeout (in seconds) | 10 | * @param int $timeout network timeout (in seconds) |
11 | * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) | 11 | * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) |
12 | * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION). | 12 | * @param callable|string $curlHeaderFunction Optional callback called during the download of headers |
13 | * Can be used to add download conditions on the | 13 | * (CURLOPT_HEADERFUNCTION) |
14 | * headers (response code, content type, etc.). | 14 | * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION). |
15 | * Can be used to add download conditions on the | ||
16 | * headers (response code, content type, etc.). | ||
15 | * | 17 | * |
16 | * @return array HTTP response headers, downloaded content | 18 | * @return array HTTP response headers, downloaded content |
17 | * | 19 | * |
@@ -35,13 +37,18 @@ use Shaarli\Http\Url; | |||
35 | * @see http://stackoverflow.com/q/9183178 | 37 | * @see http://stackoverflow.com/q/9183178 |
36 | * @see http://stackoverflow.com/q/1462720 | 38 | * @see http://stackoverflow.com/q/1462720 |
37 | */ | 39 | */ |
38 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null) | 40 | function get_http_response( |
39 | { | 41 | $url, |
42 | $timeout = 30, | ||
43 | $maxBytes = 4194304, | ||
44 | $curlHeaderFunction = null, | ||
45 | $curlWriteFunction = null | ||
46 | ) { | ||
40 | $urlObj = new Url($url); | 47 | $urlObj = new Url($url); |
41 | $cleanUrl = $urlObj->idnToAscii(); | 48 | $cleanUrl = $urlObj->idnToAscii(); |
42 | 49 | ||
43 | if (!filter_var($cleanUrl, FILTER_VALIDATE_URL) || !$urlObj->isHttp()) { | 50 | if (!filter_var($cleanUrl, FILTER_VALIDATE_URL) || !$urlObj->isHttp()) { |
44 | return array(array(0 => 'Invalid HTTP UrlUtils'), false); | 51 | return [[0 => 'Invalid HTTP UrlUtils'], false]; |
45 | } | 52 | } |
46 | 53 | ||
47 | $userAgent = | 54 | $userAgent = |
@@ -64,42 +71,39 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF | |||
64 | 71 | ||
65 | $ch = curl_init($cleanUrl); | 72 | $ch = curl_init($cleanUrl); |
66 | if ($ch === false) { | 73 | if ($ch === false) { |
67 | return array(array(0 => 'curl_init() error'), false); | 74 | return [[0 => 'curl_init() error'], false]; |
68 | } | 75 | } |
69 | 76 | ||
70 | // General cURL settings | 77 | // General cURL settings |
71 | curl_setopt($ch, CURLOPT_AUTOREFERER, true); | 78 | curl_setopt($ch, CURLOPT_AUTOREFERER, true); |
72 | curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | 79 | curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); |
73 | curl_setopt($ch, CURLOPT_HEADER, true); | 80 | // Default header download if the $curlHeaderFunction is not defined |
81 | curl_setopt($ch, CURLOPT_HEADER, !is_callable($curlHeaderFunction)); | ||
74 | curl_setopt( | 82 | curl_setopt( |
75 | $ch, | 83 | $ch, |
76 | CURLOPT_HTTPHEADER, | 84 | CURLOPT_HTTPHEADER, |
77 | array('Accept-Language: ' . $acceptLanguage) | 85 | ['Accept-Language: ' . $acceptLanguage] |
78 | ); | 86 | ); |
79 | curl_setopt($ch, CURLOPT_MAXREDIRS, $maxRedirs); | 87 | curl_setopt($ch, CURLOPT_MAXREDIRS, $maxRedirs); |
80 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | 88 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
81 | curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); | 89 | curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); |
82 | curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); | 90 | curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); |
83 | 91 | ||
92 | // Max download size management | ||
93 | curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024 * 16); | ||
94 | curl_setopt($ch, CURLOPT_NOPROGRESS, false); | ||
95 | if (is_callable($curlHeaderFunction)) { | ||
96 | curl_setopt($ch, CURLOPT_HEADERFUNCTION, $curlHeaderFunction); | ||
97 | } | ||
84 | if (is_callable($curlWriteFunction)) { | 98 | if (is_callable($curlWriteFunction)) { |
85 | curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction); | 99 | curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction); |
86 | } | 100 | } |
87 | |||
88 | // Max download size management | ||
89 | curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16); | ||
90 | curl_setopt($ch, CURLOPT_NOPROGRESS, false); | ||
91 | curl_setopt( | 101 | curl_setopt( |
92 | $ch, | 102 | $ch, |
93 | CURLOPT_PROGRESSFUNCTION, | 103 | CURLOPT_PROGRESSFUNCTION, |
94 | function ($arg0, $arg1, $arg2, $arg3, $arg4 = 0) use ($maxBytes) { | 104 | function ($arg0, $arg1, $arg2, $arg3, $arg4) use ($maxBytes) { |
95 | if (version_compare(phpversion(), '5.5', '<')) { | 105 | $downloaded = $arg2; |
96 | // PHP version lower than 5.5 | 106 | |
97 | // Callback has 4 arguments | ||
98 | $downloaded = $arg1; | ||
99 | } else { | ||
100 | // Callback has 5 arguments | ||
101 | $downloaded = $arg2; | ||
102 | } | ||
103 | // Non-zero return stops downloading | 107 | // Non-zero return stops downloading |
104 | return ($downloaded > $maxBytes) ? 1 : 0; | 108 | return ($downloaded > $maxBytes) ? 1 : 0; |
105 | } | 109 | } |
@@ -118,9 +122,9 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF | |||
118 | * Removing this would require updating | 122 | * Removing this would require updating |
119 | * GetHttpUrlTest::testGetInvalidRemoteUrl() | 123 | * GetHttpUrlTest::testGetInvalidRemoteUrl() |
120 | */ | 124 | */ |
121 | return array(false, false); | 125 | return [false, false]; |
122 | } | 126 | } |
123 | return array(array(0 => 'curl_exec() error: ' . $errorStr), false); | 127 | return [[0 => 'curl_exec() error: ' . $errorStr], false]; |
124 | } | 128 | } |
125 | 129 | ||
126 | // Formatting output like the fallback method | 130 | // Formatting output like the fallback method |
@@ -131,7 +135,7 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF | |||
131 | $rawHeadersLastRedir = end($rawHeadersArrayRedirs); | 135 | $rawHeadersLastRedir = end($rawHeadersArrayRedirs); |
132 | 136 | ||
133 | $content = substr($response, $headSize); | 137 | $content = substr($response, $headSize); |
134 | $headers = array(); | 138 | $headers = []; |
135 | foreach (preg_split('~[\r\n]+~', $rawHeadersLastRedir) as $line) { | 139 | foreach (preg_split('~[\r\n]+~', $rawHeadersLastRedir) as $line) { |
136 | if (empty($line) || ctype_space($line)) { | 140 | if (empty($line) || ctype_space($line)) { |
137 | continue; | 141 | continue; |
@@ -142,7 +146,7 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF | |||
142 | $value = $splitLine[1]; | 146 | $value = $splitLine[1]; |
143 | if (array_key_exists($key, $headers)) { | 147 | if (array_key_exists($key, $headers)) { |
144 | if (!is_array($headers[$key])) { | 148 | if (!is_array($headers[$key])) { |
145 | $headers[$key] = array(0 => $headers[$key]); | 149 | $headers[$key] = [0 => $headers[$key]]; |
146 | } | 150 | } |
147 | $headers[$key][] = $value; | 151 | $headers[$key][] = $value; |
148 | } else { | 152 | } else { |
@@ -153,7 +157,7 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF | |||
153 | } | 157 | } |
154 | } | 158 | } |
155 | 159 | ||
156 | return array($headers, $content); | 160 | return [$headers, $content]; |
157 | } | 161 | } |
158 | 162 | ||
159 | /** | 163 | /** |
@@ -184,15 +188,15 @@ function get_http_response_fallback( | |||
184 | $acceptLanguage, | 188 | $acceptLanguage, |
185 | $maxRedr | 189 | $maxRedr |
186 | ) { | 190 | ) { |
187 | $options = array( | 191 | $options = [ |
188 | 'http' => array( | 192 | 'http' => [ |
189 | 'method' => 'GET', | 193 | 'method' => 'GET', |
190 | 'timeout' => $timeout, | 194 | 'timeout' => $timeout, |
191 | 'user_agent' => $userAgent, | 195 | 'user_agent' => $userAgent, |
192 | 'header' => "Accept: */*\r\n" | 196 | 'header' => "Accept: */*\r\n" |
193 | . 'Accept-Language: ' . $acceptLanguage | 197 | . 'Accept-Language: ' . $acceptLanguage |
194 | ) | 198 | ] |
195 | ); | 199 | ]; |
196 | 200 | ||
197 | stream_context_set_default($options); | 201 | stream_context_set_default($options); |
198 | list($headers, $finalUrl) = get_redirected_headers($cleanUrl, $maxRedr); | 202 | list($headers, $finalUrl) = get_redirected_headers($cleanUrl, $maxRedr); |
@@ -203,7 +207,7 @@ function get_http_response_fallback( | |||
203 | } | 207 | } |
204 | 208 | ||
205 | if (! $headers) { | 209 | if (! $headers) { |
206 | return array($headers, false); | 210 | return [$headers, false]; |
207 | } | 211 | } |
208 | 212 | ||
209 | try { | 213 | try { |
@@ -211,10 +215,10 @@ function get_http_response_fallback( | |||
211 | $context = stream_context_create($options); | 215 | $context = stream_context_create($options); |
212 | $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes); | 216 | $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes); |
213 | } catch (Exception $exc) { | 217 | } catch (Exception $exc) { |
214 | return array(array(0 => 'HTTP Error'), $exc->getMessage()); | 218 | return [[0 => 'HTTP Error'], $exc->getMessage()]; |
215 | } | 219 | } |
216 | 220 | ||
217 | return array($headers, $content); | 221 | return [$headers, $content]; |
218 | } | 222 | } |
219 | 223 | ||
220 | /** | 224 | /** |
@@ -233,10 +237,12 @@ function get_redirected_headers($url, $redirectionLimit = 3) | |||
233 | } | 237 | } |
234 | 238 | ||
235 | // Headers found, redirection found, and limit not reached. | 239 | // Headers found, redirection found, and limit not reached. |
236 | if ($redirectionLimit-- > 0 | 240 | if ( |
241 | $redirectionLimit-- > 0 | ||
237 | && !empty($headers) | 242 | && !empty($headers) |
238 | && (strpos($headers[0], '301') !== false || strpos($headers[0], '302') !== false) | 243 | && (strpos($headers[0], '301') !== false || strpos($headers[0], '302') !== false) |
239 | && !empty($headers['Location'])) { | 244 | && !empty($headers['Location']) |
245 | ) { | ||
240 | $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; | 246 | $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; |
241 | if ($redirection != $url) { | 247 | if ($redirection != $url) { |
242 | $redirection = getAbsoluteUrl($url, $redirection); | 248 | $redirection = getAbsoluteUrl($url, $redirection); |
@@ -244,7 +250,7 @@ function get_redirected_headers($url, $redirectionLimit = 3) | |||
244 | } | 250 | } |
245 | } | 251 | } |
246 | 252 | ||
247 | return array($headers, $url); | 253 | return [$headers, $url]; |
248 | } | 254 | } |
249 | 255 | ||
250 | /** | 256 | /** |
@@ -266,7 +272,7 @@ function getAbsoluteUrl($originalUrl, $newUrl) | |||
266 | } | 272 | } |
267 | 273 | ||
268 | $parts = parse_url($originalUrl); | 274 | $parts = parse_url($originalUrl); |
269 | $final = $parts['scheme'] .'://'. $parts['host']; | 275 | $final = $parts['scheme'] . '://' . $parts['host']; |
270 | $final .= (!empty($parts['port'])) ? $parts['port'] : ''; | 276 | $final .= (!empty($parts['port'])) ? $parts['port'] : ''; |
271 | $final .= '/'; | 277 | $final .= '/'; |
272 | if ($newUrl[0] != '/') { | 278 | if ($newUrl[0] != '/') { |
@@ -319,7 +325,8 @@ function server_url($server) | |||
319 | $scheme = 'https'; | 325 | $scheme = 'https'; |
320 | } | 326 | } |
321 | 327 | ||
322 | if (($scheme == 'http' && $port != '80') | 328 | if ( |
329 | ($scheme == 'http' && $port != '80') | ||
323 | || ($scheme == 'https' && $port != '443') | 330 | || ($scheme == 'https' && $port != '443') |
324 | ) { | 331 | ) { |
325 | $port = ':' . $port; | 332 | $port = ':' . $port; |
@@ -340,22 +347,26 @@ function server_url($server) | |||
340 | $host = $server['SERVER_NAME']; | 347 | $host = $server['SERVER_NAME']; |
341 | } | 348 | } |
342 | 349 | ||
343 | return $scheme.'://'.$host.$port; | 350 | return $scheme . '://' . $host . $port; |
344 | } | 351 | } |
345 | 352 | ||
346 | // SSL detection | 353 | // SSL detection |
347 | if ((! empty($server['HTTPS']) && strtolower($server['HTTPS']) == 'on') | 354 | if ( |
348 | || (isset($server['SERVER_PORT']) && $server['SERVER_PORT'] == '443')) { | 355 | (! empty($server['HTTPS']) && strtolower($server['HTTPS']) == 'on') |
356 | || (isset($server['SERVER_PORT']) && $server['SERVER_PORT'] == '443') | ||
357 | ) { | ||
349 | $scheme = 'https'; | 358 | $scheme = 'https'; |
350 | } | 359 | } |
351 | 360 | ||
352 | // Do not append standard port values | 361 | // Do not append standard port values |
353 | if (($scheme == 'http' && $server['SERVER_PORT'] != '80') | 362 | if ( |
354 | || ($scheme == 'https' && $server['SERVER_PORT'] != '443')) { | 363 | ($scheme == 'http' && $server['SERVER_PORT'] != '80') |
355 | $port = ':'.$server['SERVER_PORT']; | 364 | || ($scheme == 'https' && $server['SERVER_PORT'] != '443') |
365 | ) { | ||
366 | $port = ':' . $server['SERVER_PORT']; | ||
356 | } | 367 | } |
357 | 368 | ||
358 | return $scheme.'://'.$server['SERVER_NAME'].$port; | 369 | return $scheme . '://' . $server['SERVER_NAME'] . $port; |
359 | } | 370 | } |
360 | 371 | ||
361 | /** | 372 | /** |
@@ -493,6 +504,46 @@ function is_https($server) | |||
493 | * Get cURL callback function for CURLOPT_WRITEFUNCTION | 504 | * Get cURL callback function for CURLOPT_WRITEFUNCTION |
494 | * | 505 | * |
495 | * @param string $charset to extract from the downloaded page (reference) | 506 | * @param string $charset to extract from the downloaded page (reference) |
507 | * @param string $curlGetInfo Optionally overrides curl_getinfo function | ||
508 | * | ||
509 | * @return Closure | ||
510 | */ | ||
511 | function get_curl_header_callback( | ||
512 | &$charset, | ||
513 | $curlGetInfo = 'curl_getinfo' | ||
514 | ) { | ||
515 | $isRedirected = false; | ||
516 | |||
517 | return function ($ch, $data) use ($curlGetInfo, &$charset, &$isRedirected) { | ||
518 | $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); | ||
519 | $chunkLength = strlen($data); | ||
520 | if (!empty($responseCode) && in_array($responseCode, [301, 302])) { | ||
521 | $isRedirected = true; | ||
522 | return $chunkLength; | ||
523 | } | ||
524 | if (!empty($responseCode) && $responseCode !== 200) { | ||
525 | return false; | ||
526 | } | ||
527 | // After a redirection, the content type will keep the previous request value | ||
528 | // until it finds the next content-type header. | ||
529 | if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) { | ||
530 | $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE); | ||
531 | } | ||
532 | if (!empty($contentType) && strpos($contentType, 'text/html') === false) { | ||
533 | return false; | ||
534 | } | ||
535 | if (!empty($contentType) && empty($charset)) { | ||
536 | $charset = header_extract_charset($contentType); | ||
537 | } | ||
538 | |||
539 | return $chunkLength; | ||
540 | }; | ||
541 | } | ||
542 | |||
543 | /** | ||
544 | * Get cURL callback function for CURLOPT_WRITEFUNCTION | ||
545 | * | ||
546 | * @param string $charset to extract from the downloaded page (reference) | ||
496 | * @param string $title to extract from the downloaded page (reference) | 547 | * @param string $title to extract from the downloaded page (reference) |
497 | * @param string $description to extract from the downloaded page (reference) | 548 | * @param string $description to extract from the downloaded page (reference) |
498 | * @param string $keywords to extract from the downloaded page (reference) | 549 | * @param string $keywords to extract from the downloaded page (reference) |
@@ -507,9 +558,8 @@ function get_curl_download_callback( | |||
507 | &$description, | 558 | &$description, |
508 | &$keywords, | 559 | &$keywords, |
509 | $retrieveDescription, | 560 | $retrieveDescription, |
510 | $curlGetInfo = 'curl_getinfo' | 561 | $tagsSeparator |
511 | ) { | 562 | ) { |
512 | $isRedirected = false; | ||
513 | $currentChunk = 0; | 563 | $currentChunk = 0; |
514 | $foundChunk = null; | 564 | $foundChunk = null; |
515 | 565 | ||
@@ -524,37 +574,22 @@ function get_curl_download_callback( | |||
524 | * | 574 | * |
525 | * @return int|bool length of $data or false if we need to stop the download | 575 | * @return int|bool length of $data or false if we need to stop the download |
526 | */ | 576 | */ |
527 | return function (&$ch, $data) use ( | 577 | return function ( |
578 | $ch, | ||
579 | $data | ||
580 | ) use ( | ||
528 | $retrieveDescription, | 581 | $retrieveDescription, |
529 | $curlGetInfo, | 582 | $tagsSeparator, |
530 | &$charset, | 583 | &$charset, |
531 | &$title, | 584 | &$title, |
532 | &$description, | 585 | &$description, |
533 | &$keywords, | 586 | &$keywords, |
534 | &$isRedirected, | ||
535 | &$currentChunk, | 587 | &$currentChunk, |
536 | &$foundChunk | 588 | &$foundChunk |
537 | ) { | 589 | ) { |
590 | $chunkLength = strlen($data); | ||
538 | $currentChunk++; | 591 | $currentChunk++; |
539 | $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); | 592 | |
540 | if (!empty($responseCode) && in_array($responseCode, [301, 302])) { | ||
541 | $isRedirected = true; | ||
542 | return strlen($data); | ||
543 | } | ||
544 | if (!empty($responseCode) && $responseCode !== 200) { | ||
545 | return false; | ||
546 | } | ||
547 | // After a redirection, the content type will keep the previous request value | ||
548 | // until it finds the next content-type header. | ||
549 | if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) { | ||
550 | $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE); | ||
551 | } | ||
552 | if (!empty($contentType) && strpos($contentType, 'text/html') === false) { | ||
553 | return false; | ||
554 | } | ||
555 | if (!empty($contentType) && empty($charset)) { | ||
556 | $charset = header_extract_charset($contentType); | ||
557 | } | ||
558 | if (empty($charset)) { | 593 | if (empty($charset)) { |
559 | $charset = html_extract_charset($data); | 594 | $charset = html_extract_charset($data); |
560 | } | 595 | } |
@@ -562,6 +597,10 @@ function get_curl_download_callback( | |||
562 | $title = html_extract_title($data); | 597 | $title = html_extract_title($data); |
563 | $foundChunk = ! empty($title) ? $currentChunk : $foundChunk; | 598 | $foundChunk = ! empty($title) ? $currentChunk : $foundChunk; |
564 | } | 599 | } |
600 | if (empty($title)) { | ||
601 | $title = html_extract_tag('title', $data); | ||
602 | $foundChunk = ! empty($title) ? $currentChunk : $foundChunk; | ||
603 | } | ||
565 | if ($retrieveDescription && empty($description)) { | 604 | if ($retrieveDescription && empty($description)) { |
566 | $description = html_extract_tag('description', $data); | 605 | $description = html_extract_tag('description', $data); |
567 | $foundChunk = ! empty($description) ? $currentChunk : $foundChunk; | 606 | $foundChunk = ! empty($description) ? $currentChunk : $foundChunk; |
@@ -571,10 +610,10 @@ function get_curl_download_callback( | |||
571 | if (! empty($keywords)) { | 610 | if (! empty($keywords)) { |
572 | $foundChunk = $currentChunk; | 611 | $foundChunk = $currentChunk; |
573 | // Keywords use the format tag1, tag2 multiple words, tag | 612 | // Keywords use the format tag1, tag2 multiple words, tag |
574 | // So we format them to match Shaarli's separator and glue multiple words with '-' | 613 | // So we split the result with `,`, then if a tag contains the separator we replace it by `-`. |
575 | $keywords = implode(' ', array_map(function($keyword) { | 614 | $keywords = tags_array2str(array_map(function (string $keyword) use ($tagsSeparator): string { |
576 | return implode('-', preg_split('/\s+/', trim($keyword))); | 615 | return tags_array2str(tags_str2array($keyword, $tagsSeparator), '-'); |
577 | }, explode(',', $keywords))); | 616 | }, tags_str2array($keywords, ',')), $tagsSeparator); |
578 | } | 617 | } |
579 | } | 618 | } |
580 | 619 | ||
@@ -582,7 +621,8 @@ function get_curl_download_callback( | |||
582 | // If we already found either the title, description or keywords, | 621 | // If we already found either the title, description or keywords, |
583 | // it's highly unlikely that we'll found the other metas further than | 622 | // it's highly unlikely that we'll found the other metas further than |
584 | // in the same chunk of data or the next one. So we also stop the download after that. | 623 | // in the same chunk of data or the next one. So we also stop the download after that. |
585 | if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null | 624 | if ( |
625 | (!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null | ||
586 | && (! $retrieveDescription | 626 | && (! $retrieveDescription |
587 | || $foundChunk < $currentChunk | 627 | || $foundChunk < $currentChunk |
588 | || (!empty($title) && !empty($description) && !empty($keywords)) | 628 | || (!empty($title) && !empty($description) && !empty($keywords)) |
@@ -591,6 +631,6 @@ function get_curl_download_callback( | |||
591 | return false; | 631 | return false; |
592 | } | 632 | } |
593 | 633 | ||
594 | return strlen($data); | 634 | return $chunkLength; |
595 | }; | 635 | }; |
596 | } | 636 | } |