]> git.immae.eu Git - github/shaarli/Shaarli.git/commitdiff
Improve metadata retrieval (performances and accuracy) 1567/head
authorArthurHoaro <arthur@hoa.ro>
Thu, 15 Oct 2020 09:20:33 +0000 (11:20 +0200)
committerArthurHoaro <arthur@hoa.ro>
Thu, 15 Oct 2020 09:36:56 +0000 (11:36 +0200)
  - Use dedicated function to download headers to avoid apply multiple regexps on headers
  - Also try to extract title from meta tags

application/http/HttpAccess.php
application/http/HttpUtils.php
application/http/MetadataRetriever.php
tests/bookmark/LinkUtilsTest.php
tests/http/MetadataRetrieverTest.php

index 81d9e0762862f5265c65de6058b6fa911a4d7033..646a526404c550fd6fdaacab6ac298c86ec6a585 100644 (file)
@@ -14,9 +14,14 @@ namespace Shaarli\Http;
  */
 class HttpAccess
 {
-    public function getHttpResponse($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null)
-    {
-        return get_http_response($url, $timeout, $maxBytes, $curlWriteFunction);
+    public function getHttpResponse(
+        $url,
+        $timeout = 30,
+        $maxBytes = 4194304,
+        $curlHeaderFunction = null,
+        $curlWriteFunction = null
+    ) {
+        return get_http_response($url, $timeout, $maxBytes, $curlHeaderFunction, $curlWriteFunction);
     }
 
     public function getCurlDownloadCallback(
@@ -24,16 +29,19 @@ class HttpAccess
         &$title,
         &$description,
         &$keywords,
-        $retrieveDescription,
-        $curlGetInfo = 'curl_getinfo'
+        $retrieveDescription
     ) {
         return get_curl_download_callback(
             $charset,
             $title,
             $description,
             $keywords,
-            $retrieveDescription,
-            $curlGetInfo
+            $retrieveDescription
         );
     }
+
+    public function getCurlHeaderCallback(&$charset, $curlGetInfo = 'curl_getinfo')
+    {
+        return get_curl_header_callback($charset, $curlGetInfo);
+    }
 }
index 9f4140735a695c4ab8e08b2c10a8e49eaa3527bb..28c129696b45b303c2cb21d29227f600551540d4 100644 (file)
@@ -6,12 +6,14 @@ use Shaarli\Http\Url;
  * GET an HTTP URL to retrieve its content
  * Uses the cURL library or a fallback method
  *
- * @param string          $url               URL to get (http://...)
- * @param int             $timeout           network timeout (in seconds)
- * @param int             $maxBytes          maximum downloaded bytes (default: 4 MiB)
- * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION).
- *                                           Can be used to add download conditions on the
- *                                           headers (response code, content type, etc.).
+ * @param string          $url                URL to get (http://...)
+ * @param int             $timeout            network timeout (in seconds)
+ * @param int             $maxBytes           maximum downloaded bytes (default: 4 MiB)
+ * @param callable|string $curlHeaderFunction Optional callback called during the download of headers
+ *                                            (CURLOPT_HEADERFUNCTION)
+ * @param callable|string $curlWriteFunction  Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION).
+ *                                            Can be used to add download conditions on the
+ *                                            headers (response code, content type, etc.).
  *
  * @return array HTTP response headers, downloaded content
  *
@@ -35,8 +37,13 @@ use Shaarli\Http\Url;
  * @see http://stackoverflow.com/q/9183178
  * @see http://stackoverflow.com/q/1462720
  */
-function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null)
-{
+function get_http_response(
+    $url,
+    $timeout = 30,
+    $maxBytes = 4194304,
+    $curlHeaderFunction = null,
+    $curlWriteFunction = null
+) {
     $urlObj = new Url($url);
     $cleanUrl = $urlObj->idnToAscii();
 
@@ -70,7 +77,8 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF
     // General cURL settings
     curl_setopt($ch, CURLOPT_AUTOREFERER, true);
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
-    curl_setopt($ch, CURLOPT_HEADER, true);
+    // Default header download if the $curlHeaderFunction is not defined
+    curl_setopt($ch, CURLOPT_HEADER, !is_callable($curlHeaderFunction));
     curl_setopt(
         $ch,
         CURLOPT_HTTPHEADER,
@@ -81,25 +89,21 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF
     curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
     curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
 
-    if (is_callable($curlWriteFunction)) {
-        curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction);
-    }
-
     // Max download size management
     curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16);
     curl_setopt($ch, CURLOPT_NOPROGRESS, false);
+    if (is_callable($curlHeaderFunction)) {
+        curl_setopt($ch, CURLOPT_HEADERFUNCTION, $curlHeaderFunction);
+    }
+    if (is_callable($curlWriteFunction)) {
+        curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction);
+    }
     curl_setopt(
         $ch,
         CURLOPT_PROGRESSFUNCTION,
-        function ($arg0, $arg1, $arg2, $arg3, $arg4 = 0) use ($maxBytes) {
-            if (version_compare(phpversion(), '5.5', '<')) {
-                // PHP version lower than 5.5
-                // Callback has 4 arguments
-                $downloaded = $arg1;
-            } else {
-                // Callback has 5 arguments
-                $downloaded = $arg2;
-            }
+        function ($arg0, $arg1, $arg2, $arg3, $arg4) use ($maxBytes) {
+            $downloaded = $arg2;
+
             // Non-zero return stops downloading
             return ($downloaded > $maxBytes) ? 1 : 0;
         }
@@ -489,6 +493,46 @@ function is_https($server)
     return ! empty($server['HTTPS']);
 }
 
+/**
+ * Get cURL callback function for CURLOPT_WRITEFUNCTION
+ *
+ * @param string $charset     to extract from the downloaded page (reference)
+ * @param string $curlGetInfo Optionally overrides curl_getinfo function
+ *
+ * @return Closure
+ */
+function get_curl_header_callback(
+    &$charset,
+    $curlGetInfo = 'curl_getinfo'
+) {
+    $isRedirected = false;
+
+    return function ($ch, $data) use ($curlGetInfo, &$charset, &$isRedirected) {
+        $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
+        $chunkLength = strlen($data);
+        if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
+            $isRedirected = true;
+            return $chunkLength;
+        }
+        if (!empty($responseCode) && $responseCode !== 200) {
+            return false;
+        }
+        // After a redirection, the content type will keep the previous request value
+        // until it finds the next content-type header.
+        if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
+            $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
+        }
+        if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
+            return false;
+        }
+        if (!empty($contentType) && empty($charset)) {
+            $charset = header_extract_charset($contentType);
+        }
+
+        return $chunkLength;
+    };
+}
+
 /**
  * Get cURL callback function for CURLOPT_WRITEFUNCTION
  *
@@ -506,10 +550,8 @@ function get_curl_download_callback(
     &$title,
     &$description,
     &$keywords,
-    $retrieveDescription,
-    $curlGetInfo = 'curl_getinfo'
+    $retrieveDescription
 ) {
-    $isRedirected = false;
     $currentChunk = 0;
     $foundChunk = null;
 
@@ -524,37 +566,18 @@ function get_curl_download_callback(
      *
      * @return int|bool length of $data or false if we need to stop the download
      */
-    return function (&$ch, $data) use (
+    return function ($ch, $data) use (
         $retrieveDescription,
-        $curlGetInfo,
         &$charset,
         &$title,
         &$description,
         &$keywords,
-        &$isRedirected,
         &$currentChunk,
         &$foundChunk
     ) {
+        $chunkLength = strlen($data);
         $currentChunk++;
-        $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
-        if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
-            $isRedirected = true;
-            return strlen($data);
-        }
-        if (!empty($responseCode) && $responseCode !== 200) {
-            return false;
-        }
-        // After a redirection, the content type will keep the previous request value
-        // until it finds the next content-type header.
-        if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
-            $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
-        }
-        if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
-            return false;
-        }
-        if (!empty($contentType) && empty($charset)) {
-            $charset = header_extract_charset($contentType);
-        }
+
         if (empty($charset)) {
             $charset = html_extract_charset($data);
         }
@@ -562,6 +585,10 @@ function get_curl_download_callback(
             $title = html_extract_title($data);
             $foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
         }
+        if (empty($title)) {
+            $title = html_extract_tag('title', $data);
+            $foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
+        }
         if ($retrieveDescription && empty($description)) {
             $description = html_extract_tag('description', $data);
             $foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
@@ -591,6 +618,6 @@ function get_curl_download_callback(
             return false;
         }
 
-        return strlen($data);
+        return $chunkLength;
     };
 }
index 2ca982e21172236fb7aaaf957f0db8c255725b32..ba9bd40ce283d4b789f194d13ac7e02883361c6f 100644 (file)
@@ -46,6 +46,7 @@ class MetadataRetriever
             $url,
             $this->conf->get('general.download_timeout', 30),
             $this->conf->get('general.download_max_size', 4194304),
+            $this->httpAccess->getCurlHeaderCallback($charset),
             $this->httpAccess->getCurlDownloadCallback(
                 $charset,
                 $title,
index 29941c8cd0ed32307faa0eea5bde9b99b3d77967..3321242fae07f018c91b98b2f40067fa2d6a9e22 100644 (file)
@@ -215,61 +215,92 @@ class LinkUtilsTest extends TestCase
         $this->assertFalse(html_extract_tag('description', $html));
     }
 
+    /**
+     * Test the header callback with valid value
+     */
+    public function testCurlHeaderCallbackOk(): void
+    {
+        $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ok');
+        $data = [
+            'HTTP/1.1 200 OK',
+            'Server: GitHub.com',
+            'Date: Sat, 28 Oct 2017 12:01:33 GMT',
+            'Content-Type: text/html; charset=utf-8',
+            'Status: 200 OK',
+        ];
+
+        foreach ($data as $chunk) {
+            static::assertIsInt($callback(null, $chunk));
+        }
+
+        static::assertSame('utf-8', $charset);
+    }
+
     /**
      * Test the download callback with valid value
      */
-    public function testCurlDownloadCallbackOk()
+    public function testCurlDownloadCallbackOk(): void
     {
+        $charset = 'utf-8';
         $callback = get_curl_download_callback(
             $charset,
             $title,
             $desc,
             $keywords,
-            false,
-            'ut_curl_getinfo_ok'
+            false
         );
+
         $data = [
-            'HTTP/1.1 200 OK',
-            'Server: GitHub.com',
-            'Date: Sat, 28 Oct 2017 12:01:33 GMT',
-            'Content-Type: text/html; charset=utf-8',
-            'Status: 200 OK',
-            'end' => 'th=device-width">'
+            'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
             '<title>ignored</title>'
                 . '<meta name="description" content="desc" />'
                 . '<meta name="keywords" content="key1,key2" />',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $expected = $key !== 'end' ? strlen($line) : false;
-            $this->assertEquals($expected, $callback($ignore, $line));
-            if ($expected === false) {
-                break;
-            }
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
-        $this->assertEquals('utf-8', $charset);
-        $this->assertEquals('Refactoring · GitHub', $title);
-        $this->assertEmpty($desc);
-        $this->assertEmpty($keywords);
+
+        static::assertSame('utf-8', $charset);
+        static::assertSame('Refactoring · GitHub', $title);
+        static::assertEmpty($desc);
+        static::assertEmpty($keywords);
+    }
+
+    /**
+     * Test the header callback with valid value
+     */
+    public function testCurlHeaderCallbackNoCharset(): void
+    {
+        $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_no_charset');
+        $data = [
+            'HTTP/1.1 200 OK',
+        ];
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
+        }
+
+        static::assertFalse($charset);
     }
 
     /**
      * Test the download callback with valid values and no charset
      */
-    public function testCurlDownloadCallbackOkNoCharset()
+    public function testCurlDownloadCallbackOkNoCharset(): void
     {
+        $charset = null;
         $callback = get_curl_download_callback(
             $charset,
             $title,
             $desc,
             $keywords,
-            false,
-            'ut_curl_getinfo_no_charset'
+            false
         );
+
         $data = [
-            'HTTP/1.1 200 OK',
             'end' => 'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
@@ -277,10 +308,11 @@ class LinkUtilsTest extends TestCase
             . '<meta name="description" content="desc" />'
             . '<meta name="keywords" content="key1,key2" />',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $this->assertEquals(strlen($line), $callback($ignore, $line));
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
+
         $this->assertEmpty($charset);
         $this->assertEquals('Refactoring · GitHub', $title);
         $this->assertEmpty($desc);
@@ -290,18 +322,18 @@ class LinkUtilsTest extends TestCase
     /**
      * Test the download callback with valid values and no charset
      */
-    public function testCurlDownloadCallbackOkHtmlCharset()
+    public function testCurlDownloadCallbackOkHtmlCharset(): void
     {
+        $charset = null;
         $callback = get_curl_download_callback(
             $charset,
             $title,
             $desc,
             $keywords,
-            false,
-            'ut_curl_getinfo_no_charset'
+            false
         );
+
         $data = [
-            'HTTP/1.1 200 OK',
             '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
             'end' => 'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
@@ -310,14 +342,10 @@ class LinkUtilsTest extends TestCase
             . '<meta name="description" content="desc" />'
             . '<meta name="keywords" content="key1,key2" />',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $expected = $key !== 'end' ? strlen($line) : false;
-            $this->assertEquals($expected, $callback($ignore, $line));
-            if ($expected === false) {
-                break;
-            }
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
+
         $this->assertEquals('utf-8', $charset);
         $this->assertEquals('Refactoring · GitHub', $title);
         $this->assertEmpty($desc);
@@ -327,25 +355,26 @@ class LinkUtilsTest extends TestCase
     /**
      * Test the download callback with valid values and no title
      */
-    public function testCurlDownloadCallbackOkNoTitle()
+    public function testCurlDownloadCallbackOkNoTitle(): void
     {
+        $charset = 'utf-8';
         $callback = get_curl_download_callback(
             $charset,
             $title,
             $desc,
             $keywords,
-            false,
-            'ut_curl_getinfo_ok'
+            false
         );
+
         $data = [
-            'HTTP/1.1 200 OK',
             'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
             'ignored',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $this->assertEquals(strlen($line), $callback($ignore, $line));
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
+
         $this->assertEquals('utf-8', $charset);
         $this->assertEmpty($title);
         $this->assertEmpty($desc);
@@ -353,81 +382,55 @@ class LinkUtilsTest extends TestCase
     }
 
     /**
-     * Test the download callback with an invalid content type.
+     * Test the header callback with an invalid content type.
      */
-    public function testCurlDownloadCallbackInvalidContentType()
+    public function testCurlHeaderCallbackInvalidContentType(): void
     {
-        $callback = get_curl_download_callback(
-            $charset,
-            $title,
-            $desc,
-            $keywords,
-            false,
-            'ut_curl_getinfo_ct_ko'
-        );
-        $ignore = null;
-        $this->assertFalse($callback($ignore, ''));
-        $this->assertEmpty($charset);
-        $this->assertEmpty($title);
+        $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ct_ko');
+        $data = [
+            'HTTP/1.1 200 OK',
+        ];
+
+        static::assertFalse($callback(null, $data[0]));
+        static::assertNull($charset);
     }
 
     /**
-     * Test the download callback with an invalid response code.
+     * Test the header callback with an invalid response code.
      */
-    public function testCurlDownloadCallbackInvalidResponseCode()
+    public function testCurlHeaderCallbackInvalidResponseCode(): void
     {
-        $callback = $callback = get_curl_download_callback(
-            $charset,
-            $title,
-            $desc,
-            $keywords,
-            false,
-            'ut_curl_getinfo_rc_ko'
-        );
-        $ignore = null;
-        $this->assertFalse($callback($ignore, ''));
-        $this->assertEmpty($charset);
-        $this->assertEmpty($title);
+        $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rc_ko');
+
+        static::assertFalse($callback(null, ''));
+        static::assertNull($charset);
     }
 
     /**
-     * Test the download callback with an invalid content type and response code.
+     * Test the header callback with an invalid content type and response code.
      */
-    public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode()
+    public function testCurlHeaderCallbackInvalidContentTypeAndResponseCode(): void
     {
-        $callback = $callback = get_curl_download_callback(
-            $charset,
-            $title,
-            $desc,
-            $keywords,
-            false,
-            'ut_curl_getinfo_rs_ct_ko'
-        );
-        $ignore = null;
-        $this->assertFalse($callback($ignore, ''));
-        $this->assertEmpty($charset);
-        $this->assertEmpty($title);
+        $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rs_ct_ko');
+
+        static::assertFalse($callback(null, ''));
+        static::assertNull($charset);
     }
 
     /**
      * Test the download callback with valid value, and retrieve_description option enabled.
      */
-    public function testCurlDownloadCallbackOkWithDesc()
+    public function testCurlDownloadCallbackOkWithDesc(): void
     {
+        $charset = 'utf-8';
         $callback = get_curl_download_callback(
             $charset,
             $title,
             $desc,
             $keywords,
-            true,
-            'ut_curl_getinfo_ok'
+            true
         );
         $data = [
-            'HTTP/1.1 200 OK',
-            'Server: GitHub.com',
-            'Date: Sat, 28 Oct 2017 12:01:33 GMT',
-            'Content-Type: text/html; charset=utf-8',
-            'Status: 200 OK',
             'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
@@ -435,14 +438,11 @@ class LinkUtilsTest extends TestCase
             . '<meta name="description" content="link desc" />'
             . '<meta name="keywords" content="key1,key2" />',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $expected = $key !== 'end' ? strlen($line) : false;
-            $this->assertEquals($expected, $callback($ignore, $line));
-            if ($expected === false) {
-                break;
-            }
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
+
         $this->assertEquals('utf-8', $charset);
         $this->assertEquals('Refactoring · GitHub', $title);
         $this->assertEquals('link desc', $desc);
@@ -453,8 +453,9 @@ class LinkUtilsTest extends TestCase
      * Test the download callback with valid value, and retrieve_description option enabled,
      * but no desc or keyword defined in the page.
      */
-    public function testCurlDownloadCallbackOkWithDescNotFound()
+    public function testCurlDownloadCallbackOkWithDescNotFound(): void
     {
+        $charset = 'utf-8';
         $callback = get_curl_download_callback(
             $charset,
             $title,
@@ -464,24 +465,16 @@ class LinkUtilsTest extends TestCase
             'ut_curl_getinfo_ok'
         );
         $data = [
-            'HTTP/1.1 200 OK',
-            'Server: GitHub.com',
-            'Date: Sat, 28 Oct 2017 12:01:33 GMT',
-            'Content-Type: text/html; charset=utf-8',
-            'Status: 200 OK',
             'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
             'end' => '<title>ignored</title>',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $expected = $key !== 'end' ? strlen($line) : false;
-            $this->assertEquals($expected, $callback($ignore, $line));
-            if ($expected === false) {
-                break;
-            }
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
+
         $this->assertEquals('utf-8', $charset);
         $this->assertEquals('Refactoring · GitHub', $title);
         $this->assertEmpty($desc);
index 2a1838e81cb1ad59e5f7b62a8326127219469cc2..3c9eaa0e706a40803bee68a0a952028bd7c44e1c 100644 (file)
@@ -38,6 +38,7 @@ class MetadataRetrieverTest extends TestCase
         $remoteTitle = 'Remote Title ';
         $remoteDesc = 'Sometimes the meta description is relevant.';
         $remoteTags = 'abc def';
+        $remoteCharset = 'utf-8';
 
         $expectedResult = [
             'title' => $remoteTitle,
@@ -45,11 +46,28 @@ class MetadataRetrieverTest extends TestCase
             'tags' => $remoteTags,
         ];
 
+        $this->httpAccess
+            ->expects(static::once())
+            ->method('getCurlHeaderCallback')
+            ->willReturnCallback(
+                function (&$charset) use (
+                    $remoteCharset
+                ): callable {
+                    return function () use (
+                        &$charset,
+                        $remoteCharset
+                    ): void {
+                        $charset = $remoteCharset;
+                    };
+                }
+            )
+        ;
         $this->httpAccess
             ->expects(static::once())
             ->method('getCurlDownloadCallback')
             ->willReturnCallback(
                 function (&$charset, &$title, &$description, &$tags) use (
+                    $remoteCharset,
                     $remoteTitle,
                     $remoteDesc,
                     $remoteTags
@@ -59,11 +77,13 @@ class MetadataRetrieverTest extends TestCase
                         &$title,
                         &$description,
                         &$tags,
+                        $remoteCharset,
                         $remoteTitle,
                         $remoteDesc,
                         $remoteTags
                     ): void {
-                        $charset = 'ISO-8859-1';
+                        static::assertSame($remoteCharset, $charset);
+
                         $title = $remoteTitle;
                         $description = $remoteDesc;
                         $tags = $remoteTags;
@@ -75,8 +95,9 @@ class MetadataRetrieverTest extends TestCase
             ->expects(static::once())
             ->method('getHttpResponse')
             ->with($url, 30, 4194304)
-            ->willReturnCallback(function($url, $timeout, $maxBytes, $callback): void {
-                $callback();
+            ->willReturnCallback(function($url, $timeout, $maxBytes, $headerCallback, $dlCallback): void {
+                $headerCallback();
+                $dlCallback();
             })
         ;
 
@@ -102,8 +123,17 @@ class MetadataRetrieverTest extends TestCase
             ->expects(static::once())
             ->method('getCurlDownloadCallback')
             ->willReturnCallback(
-                function (&$charset, &$title, &$description, &$tags): callable {
-                    return function () use (&$charset, &$title, &$description, &$tags): void {};
+                function (): callable {
+                    return function (): void {};
+                }
+            )
+        ;
+        $this->httpAccess
+            ->expects(static::once())
+            ->method('getCurlHeaderCallback')
+            ->willReturnCallback(
+                function (): callable {
+                    return function (): void {};
                 }
             )
         ;
@@ -111,8 +141,9 @@ class MetadataRetrieverTest extends TestCase
             ->expects(static::once())
             ->method('getHttpResponse')
             ->with($url, 30, 4194304)
-            ->willReturnCallback(function($url, $timeout, $maxBytes, $callback): void {
-                $callback();
+            ->willReturnCallback(function($url, $timeout, $maxBytes, $headerCallback, $dlCallback): void {
+                $headerCallback();
+                $dlCallback();
             })
         ;