aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorKevin Decherf <kevin@kdecherf.com>2018-09-06 22:26:20 +0200
committerKevin Decherf <kevin@kdecherf.com>2018-10-22 23:01:16 +0200
commite07fadea76aa7329c4b955a59e74cb867c733706 (patch)
tree8dcc67b77766c59961d1764c33976b0aa62ac448
parent781864b9546b0ff2d6fe42ce72f78b8f40b785e9 (diff)
downloadwallabag-e07fadea76aa7329c4b955a59e74cb867c733706.tar.gz
wallabag-e07fadea76aa7329c4b955a59e74cb867c733706.tar.zst
wallabag-e07fadea76aa7329c4b955a59e74cb867c733706.zip
Refactor updateOriginUrl to include new behaviors behaviors
- Leave origin_url unchanged if difference is an ending slash - Leave origin_url unchanged if difference is scheme - Ignore (noop) if difference is query string or fragment Signed-off-by: Kevin Decherf <kevin@kdecherf.com>
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php54
-rw-r--r--tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php121
2 files changed, 140 insertions, 35 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index da0ec5a3..007ee8bb 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -246,15 +246,7 @@ class ContentProxy
246 */ 246 */
247 private function stockEntry(Entry $entry, array $content) 247 private function stockEntry(Entry $entry, array $content)
248 { 248 {
249 // When a redirection occurs while fetching an entry 249 $this->updateOriginUrl($entry, $content['url']);
250 // we move the original url in origin_url property if empty
251 // and set the entry url with the final value
252 if (!empty($content['url']) && $entry->getUrl() !== $content['url']) {
253 if (empty($entry->getOriginUrl())) {
254 $entry->setOriginUrl($entry->getUrl());
255 }
256 $entry->setUrl($content['url']);
257 }
258 250
259 $this->setEntryDomainName($entry); 251 $this->setEntryDomainName($entry);
260 252
@@ -321,6 +313,50 @@ class ContentProxy
321 } 313 }
322 314
323 /** 315 /**
316 * Update the origin_url field when a redirection occurs
317 * This field is set if it is empty and new url does not match ignore list.
318 *
319 * @param Entry $entry
320 * @param string $url
321 */
322 private function updateOriginUrl(Entry $entry, $url)
323 {
324 if (!empty($url) && $entry->getUrl() !== $url) {
325 $parsed_entry_url = parse_url($entry->getUrl());
326 $parsed_content_url = parse_url($url);
327
328 $diff_ec = array_diff_assoc($parsed_entry_url, $parsed_content_url);
329 $diff_ce = array_diff_assoc($parsed_content_url, $parsed_entry_url);
330
331 $diff = array_merge($diff_ec, $diff_ce);
332 $diff_keys = array_keys($diff);
333 sort($diff_keys);
334
335 switch ($diff_keys) {
336 case ['path']:
337 if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry
338 || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId
339 $entry->setUrl($url);
340 }
341 break;
342 case ['scheme']:
343 $entry->setUrl($url);
344 break;
345 case ['fragment']:
346 case ['query']:
347 // noop
348 break;
349 default:
350 if (empty($entry->getOriginUrl())) {
351 $entry->setOriginUrl($entry->getUrl());
352 }
353 $entry->setUrl($url);
354 break;
355 }
356 }
357 }
358
359 /**
324 * Validate that the given content has at least a title, an html and a url. 360 * Validate that the given content has at least a title, an html and a url.
325 * 361 *
326 * @param array $content 362 * @param array $content
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
index 84b38f02..c20732cc 100644
--- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
@@ -740,6 +740,101 @@ class ContentProxyTest extends TestCase
740 } 740 }
741 741
742 /** 742 /**
743 * Data provider for testWithChangedUrl.
744 *
745 * Arrays contain the following values:
746 * $entry_url
747 * $origin_url
748 * $content_url
749 * $expected_entry_url
750 * $expected_origin_url
751 * $expected_domain
752 */
753 public function dataForChangedUrl()
754 {
755 return [
756 'normal' => [
757 'http://0.0.0.0',
758 null,
759 'http://1.1.1.1',
760 'http://1.1.1.1',
761 'http://0.0.0.0',
762 '1.1.1.1',
763 ],
764 'origin already set' => [
765 'http://0.0.0.0',
766 'http://hello',
767 'http://1.1.1.1',
768 'http://1.1.1.1',
769 'http://hello',
770 '1.1.1.1',
771 ],
772 'trailing slash' => [
773 'https://example.com/hello-world',
774 null,
775 'https://example.com/hello-world/',
776 'https://example.com/hello-world/',
777 null,
778 'example.com',
779 ],
780 'no query string in fetched content' => [
781 'https://example.org/hello?world=1',
782 null,
783 'https://example.org/hello',
784 'https://example.org/hello?world=1',
785 null,
786 'example.org',
787 ],
788 'query string in fetched content' => [
789 'https://example.org/hello',
790 null,
791 'https://example.org/hello?world=1',
792 'https://example.org/hello',
793 null,
794 'example.org',
795 ],
796 'fragment in fetched content' => [
797 'https://example.org/hello',
798 null,
799 'https://example.org/hello#world',
800 'https://example.org/hello',
801 null,
802 'example.org',
803 ],
804 ];
805 }
806
807 /**
808 * @dataProvider dataForChangedUrl
809 */
810 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
811 {
812 $tagger = $this->getTaggerMock();
813 $tagger->expects($this->once())
814 ->method('tag');
815
816 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
817 $entry = new Entry(new User());
818 $entry->setOriginUrl($origin_url);
819 $proxy->updateEntry(
820 $entry,
821 $entry_url,
822 [
823 'html' => false,
824 'title' => '',
825 'url' => $content_url,
826 'content_type' => '',
827 'language' => '',
828 ],
829 true
830 );
831
832 $this->assertSame($expected_entry_url, $entry->getUrl());
833 $this->assertSame($expected_domain, $entry->getDomainName());
834 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
835 }
836
837 /**
743 * https://stackoverflow.com/a/18506801. 838 * https://stackoverflow.com/a/18506801.
744 * 839 *
745 * @param $string 840 * @param $string
@@ -775,32 +870,6 @@ class ContentProxyTest extends TestCase
775 return $string; 870 return $string;
776 } 871 }
777 872
778 public function testWithChangedUrl()
779 {
780 $tagger = $this->getTaggerMock();
781 $tagger->expects($this->once())
782 ->method('tag');
783
784 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
785 $entry = new Entry(new User());
786 $proxy->updateEntry(
787 $entry,
788 'http://0.0.0.0',
789 [
790 'html' => false,
791 'title' => '',
792 'url' => 'http://1.1.1.1',
793 'content_type' => '',
794 'language' => '',
795 ],
796 true
797 );
798
799 $this->assertSame('http://1.1.1.1', $entry->getUrl());
800 $this->assertSame('1.1.1.1', $entry->getDomainName());
801 $this->assertSame('http://0.0.0.0', $entry->getOriginUrl());
802 }
803
804 private function getTaggerMock() 873 private function getTaggerMock()
805 { 874 {
806 return $this->getMockBuilder(RuleBasedTagger::class) 875 return $this->getMockBuilder(RuleBasedTagger::class)