diff options
author | Kevin Decherf <kevin@kdecherf.com> | 2018-09-06 22:26:20 +0200 |
---|---|---|
committer | Kevin Decherf <kevin@kdecherf.com> | 2018-10-22 23:01:16 +0200 |
commit | e07fadea76aa7329c4b955a59e74cb867c733706 (patch) | |
tree | 8dcc67b77766c59961d1764c33976b0aa62ac448 | |
parent | 781864b9546b0ff2d6fe42ce72f78b8f40b785e9 (diff) | |
download | wallabag-e07fadea76aa7329c4b955a59e74cb867c733706.tar.gz wallabag-e07fadea76aa7329c4b955a59e74cb867c733706.tar.zst wallabag-e07fadea76aa7329c4b955a59e74cb867c733706.zip |
Refactor updateOriginUrl to include new behaviors behaviors
- Leave origin_url unchanged if difference is an ending slash
- Leave origin_url unchanged if difference is scheme
- Ignore (noop) if difference is query string or fragment
Signed-off-by: Kevin Decherf <kevin@kdecherf.com>
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 54 | ||||
-rw-r--r-- | tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | 121 |
2 files changed, 140 insertions, 35 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index da0ec5a3..007ee8bb 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -246,15 +246,7 @@ class ContentProxy | |||
246 | */ | 246 | */ |
247 | private function stockEntry(Entry $entry, array $content) | 247 | private function stockEntry(Entry $entry, array $content) |
248 | { | 248 | { |
249 | // When a redirection occurs while fetching an entry | 249 | $this->updateOriginUrl($entry, $content['url']); |
250 | // we move the original url in origin_url property if empty | ||
251 | // and set the entry url with the final value | ||
252 | if (!empty($content['url']) && $entry->getUrl() !== $content['url']) { | ||
253 | if (empty($entry->getOriginUrl())) { | ||
254 | $entry->setOriginUrl($entry->getUrl()); | ||
255 | } | ||
256 | $entry->setUrl($content['url']); | ||
257 | } | ||
258 | 250 | ||
259 | $this->setEntryDomainName($entry); | 251 | $this->setEntryDomainName($entry); |
260 | 252 | ||
@@ -321,6 +313,50 @@ class ContentProxy | |||
321 | } | 313 | } |
322 | 314 | ||
323 | /** | 315 | /** |
316 | * Update the origin_url field when a redirection occurs | ||
317 | * This field is set if it is empty and new url does not match ignore list. | ||
318 | * | ||
319 | * @param Entry $entry | ||
320 | * @param string $url | ||
321 | */ | ||
322 | private function updateOriginUrl(Entry $entry, $url) | ||
323 | { | ||
324 | if (!empty($url) && $entry->getUrl() !== $url) { | ||
325 | $parsed_entry_url = parse_url($entry->getUrl()); | ||
326 | $parsed_content_url = parse_url($url); | ||
327 | |||
328 | $diff_ec = array_diff_assoc($parsed_entry_url, $parsed_content_url); | ||
329 | $diff_ce = array_diff_assoc($parsed_content_url, $parsed_entry_url); | ||
330 | |||
331 | $diff = array_merge($diff_ec, $diff_ce); | ||
332 | $diff_keys = array_keys($diff); | ||
333 | sort($diff_keys); | ||
334 | |||
335 | switch ($diff_keys) { | ||
336 | case ['path']: | ||
337 | if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry | ||
338 | || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId | ||
339 | $entry->setUrl($url); | ||
340 | } | ||
341 | break; | ||
342 | case ['scheme']: | ||
343 | $entry->setUrl($url); | ||
344 | break; | ||
345 | case ['fragment']: | ||
346 | case ['query']: | ||
347 | // noop | ||
348 | break; | ||
349 | default: | ||
350 | if (empty($entry->getOriginUrl())) { | ||
351 | $entry->setOriginUrl($entry->getUrl()); | ||
352 | } | ||
353 | $entry->setUrl($url); | ||
354 | break; | ||
355 | } | ||
356 | } | ||
357 | } | ||
358 | |||
359 | /** | ||
324 | * Validate that the given content has at least a title, an html and a url. | 360 | * Validate that the given content has at least a title, an html and a url. |
325 | * | 361 | * |
326 | * @param array $content | 362 | * @param array $content |
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 84b38f02..c20732cc 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | |||
@@ -740,6 +740,101 @@ class ContentProxyTest extends TestCase | |||
740 | } | 740 | } |
741 | 741 | ||
742 | /** | 742 | /** |
743 | * Data provider for testWithChangedUrl. | ||
744 | * | ||
745 | * Arrays contain the following values: | ||
746 | * $entry_url | ||
747 | * $origin_url | ||
748 | * $content_url | ||
749 | * $expected_entry_url | ||
750 | * $expected_origin_url | ||
751 | * $expected_domain | ||
752 | */ | ||
753 | public function dataForChangedUrl() | ||
754 | { | ||
755 | return [ | ||
756 | 'normal' => [ | ||
757 | 'http://0.0.0.0', | ||
758 | null, | ||
759 | 'http://1.1.1.1', | ||
760 | 'http://1.1.1.1', | ||
761 | 'http://0.0.0.0', | ||
762 | '1.1.1.1', | ||
763 | ], | ||
764 | 'origin already set' => [ | ||
765 | 'http://0.0.0.0', | ||
766 | 'http://hello', | ||
767 | 'http://1.1.1.1', | ||
768 | 'http://1.1.1.1', | ||
769 | 'http://hello', | ||
770 | '1.1.1.1', | ||
771 | ], | ||
772 | 'trailing slash' => [ | ||
773 | 'https://example.com/hello-world', | ||
774 | null, | ||
775 | 'https://example.com/hello-world/', | ||
776 | 'https://example.com/hello-world/', | ||
777 | null, | ||
778 | 'example.com', | ||
779 | ], | ||
780 | 'no query string in fetched content' => [ | ||
781 | 'https://example.org/hello?world=1', | ||
782 | null, | ||
783 | 'https://example.org/hello', | ||
784 | 'https://example.org/hello?world=1', | ||
785 | null, | ||
786 | 'example.org', | ||
787 | ], | ||
788 | 'query string in fetched content' => [ | ||
789 | 'https://example.org/hello', | ||
790 | null, | ||
791 | 'https://example.org/hello?world=1', | ||
792 | 'https://example.org/hello', | ||
793 | null, | ||
794 | 'example.org', | ||
795 | ], | ||
796 | 'fragment in fetched content' => [ | ||
797 | 'https://example.org/hello', | ||
798 | null, | ||
799 | 'https://example.org/hello#world', | ||
800 | 'https://example.org/hello', | ||
801 | null, | ||
802 | 'example.org', | ||
803 | ], | ||
804 | ]; | ||
805 | } | ||
806 | |||
807 | /** | ||
808 | * @dataProvider dataForChangedUrl | ||
809 | */ | ||
810 | public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain) | ||
811 | { | ||
812 | $tagger = $this->getTaggerMock(); | ||
813 | $tagger->expects($this->once()) | ||
814 | ->method('tag'); | ||
815 | |||
816 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); | ||
817 | $entry = new Entry(new User()); | ||
818 | $entry->setOriginUrl($origin_url); | ||
819 | $proxy->updateEntry( | ||
820 | $entry, | ||
821 | $entry_url, | ||
822 | [ | ||
823 | 'html' => false, | ||
824 | 'title' => '', | ||
825 | 'url' => $content_url, | ||
826 | 'content_type' => '', | ||
827 | 'language' => '', | ||
828 | ], | ||
829 | true | ||
830 | ); | ||
831 | |||
832 | $this->assertSame($expected_entry_url, $entry->getUrl()); | ||
833 | $this->assertSame($expected_domain, $entry->getDomainName()); | ||
834 | $this->assertSame($expected_origin_url, $entry->getOriginUrl()); | ||
835 | } | ||
836 | |||
837 | /** | ||
743 | * https://stackoverflow.com/a/18506801. | 838 | * https://stackoverflow.com/a/18506801. |
744 | * | 839 | * |
745 | * @param $string | 840 | * @param $string |
@@ -775,32 +870,6 @@ class ContentProxyTest extends TestCase | |||
775 | return $string; | 870 | return $string; |
776 | } | 871 | } |
777 | 872 | ||
778 | public function testWithChangedUrl() | ||
779 | { | ||
780 | $tagger = $this->getTaggerMock(); | ||
781 | $tagger->expects($this->once()) | ||
782 | ->method('tag'); | ||
783 | |||
784 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); | ||
785 | $entry = new Entry(new User()); | ||
786 | $proxy->updateEntry( | ||
787 | $entry, | ||
788 | 'http://0.0.0.0', | ||
789 | [ | ||
790 | 'html' => false, | ||
791 | 'title' => '', | ||
792 | 'url' => 'http://1.1.1.1', | ||
793 | 'content_type' => '', | ||
794 | 'language' => '', | ||
795 | ], | ||
796 | true | ||
797 | ); | ||
798 | |||
799 | $this->assertSame('http://1.1.1.1', $entry->getUrl()); | ||
800 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | ||
801 | $this->assertSame('http://0.0.0.0', $entry->getOriginUrl()); | ||
802 | } | ||
803 | |||
804 | private function getTaggerMock() | 873 | private function getTaggerMock() |
805 | { | 874 | { |
806 | return $this->getMockBuilder(RuleBasedTagger::class) | 875 | return $this->getMockBuilder(RuleBasedTagger::class) |