From b7fa51ae7dd5fef2d9459100c88479413ddd3fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Mon, 10 Jul 2017 21:32:25 +0200 Subject: Added given_url in entry table - Added index on entry table for given_url field - Fix tests: The previous `bit.ly` url redirected to doc.wallabag but that url doesn't exist in the fixtures. I used our own internal "redirector" to create a redirect to an url which exist in the fixtures. Also, updating current migration to use the new `WallabagMigration`. --- app/DoctrineMigrations/Version20170710125843.php | 38 +++++++++++++++++ app/DoctrineMigrations/Version20171218135243.php | 48 ++++++++++++++++++++++ src/Wallabag/CoreBundle/Entity/Entry.php | 36 +++++++++++++++- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 1 + .../CoreBundle/Repository/EntryRepository.php | 1 + .../CoreBundle/Controller/EntryControllerTest.php | 36 +++++++++++++++- 6 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 app/DoctrineMigrations/Version20170710125843.php create mode 100644 app/DoctrineMigrations/Version20171218135243.php diff --git a/app/DoctrineMigrations/Version20170710125843.php b/app/DoctrineMigrations/Version20170710125843.php new file mode 100644 index 00000000..2cf8647a --- /dev/null +++ b/app/DoctrineMigrations/Version20170710125843.php @@ -0,0 +1,38 @@ +getTable($this->getTable('entry')); + + $this->skipIf($entryTable->hasColumn('given_url'), 'It seems that you already played this migration.'); + + $entryTable->addColumn('given_url', 'text', [ + 'notnull' => false, + ]); + } + + /** + * @param Schema $schema + */ + public function down(Schema $schema) + { + $entryTable = $schema->getTable($this->getTable('entry')); + + $this->skipIf(!$entryTable->hasColumn('given_url'), 'It seems that you already played this migration.'); + + $entryTable->dropColumn('given_url'); + } +} diff --git a/app/DoctrineMigrations/Version20171218135243.php b/app/DoctrineMigrations/Version20171218135243.php new file mode 100644 index 00000000..3060c920 --- /dev/null +++ b/app/DoctrineMigrations/Version20171218135243.php @@ -0,0 +1,48 @@ +getTable($this->getTable('entry')); + $this->skipIf($entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.'); + + switch ($this->connection->getDatabasePlatform()->getName()) { + case 'sqlite': + $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);'; + break; + case 'mysql': + $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url (255), given_url (255), user_id);'; + break; + case 'postgresql': + $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);'; + break; + } + + $this->addSql($sql); + } + + /** + * @param Schema $schema + */ + public function down(Schema $schema) + { + $entryTable = $schema->getTable($this->getTable('entry')); + $this->skipIf(false === $entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.'); + + $entryTable->dropIndex($this->indexGivenUrl); + } +} diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 1b4367fd..62274136 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -28,7 +28,8 @@ use Wallabag\UserBundle\Entity\User; * @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="uid", columns={"uid"}), * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) - * } + * }, + * uniqueConstraints={@ORM\UniqueConstraint(name="IDX_entry_given_url",columns={"url", "given_url", "user_id"})} * ) * @ORM\HasLifecycleCallbacks() * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") @@ -67,6 +68,15 @@ class Entry */ private $title; + /** + * @var string + * + * @ORM\Column(name="given_url", type="text", nullable=true) + * + * @Groups({"entries_for_user", "export_all"}) + */ + private $givenUrl; + /** * @var string * @@ -315,6 +325,30 @@ class Entry return $this->title; } + /** + * Set given url. + * + * @param string $givenUrl + * + * @return Entry + */ + public function setGivenUrl($givenUrl) + { + $this->givenUrl = $givenUrl; + + return $this; + } + + /** + * Get given Url. + * + * @return string + */ + public function getGivenUrl() + { + return $this->givenUrl; + } + /** * Set url. * diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index c6fa0d98..0d6a412d 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -76,6 +76,7 @@ class ContentProxy // Not sure what are the other possible cases where this property is empty if (empty($entry->getUrl()) && !empty($url)) { $entry->setUrl($url); + $entry->setGivenUrl($url); } $this->stockEntry($entry, $content); diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 880e7c65..299b0b27 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -368,6 +368,7 @@ class EntryRepository extends EntityRepository { $res = $this->createQueryBuilder('e') ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) + // ->orWhere('e.givenUrl = :url')->setParameter('url', $url) ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) ->getQuery() ->getResult(); diff --git a/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php b/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php index 9dee9891..a6fd3fff 100644 --- a/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php +++ b/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php @@ -166,7 +166,6 @@ class EntryControllerTest extends WallabagCoreTestCase $this->assertSame($this->url, $content->getUrl()); $this->assertContains('Google', $content->getTitle()); $this->assertSame('fr', $content->getLanguage()); - $this->assertSame('2015-03-28 11:43:19', $content->getPublishedAt()->format('Y-m-d H:i:s')); $this->assertArrayHasKey('x-frame-options', $content->getHeaders()); $client->getContainer()->get('craue_config')->set('store_article_headers', 0); } @@ -266,6 +265,41 @@ class EntryControllerTest extends WallabagCoreTestCase $this->assertContains('/view/', $client->getResponse()->getTargetUrl()); } + public function testPostNewOkUrlExistWithRedirection() + { + $this->logInAs('admin'); + $client = $this->getClient(); + + $url = 'https://wllbg.org/test-redirect/c51c'; + + $crawler = $client->request('GET', '/new'); + + $this->assertSame(200, $client->getResponse()->getStatusCode()); + + $form = $crawler->filter('form[name=entry]')->form(); + + $data = [ + 'entry[url]' => $url, + ]; + + $client->submit($form, $data); + + $crawler = $client->request('GET', '/new'); + + $this->assertSame(200, $client->getResponse()->getStatusCode()); + + $form = $crawler->filter('form[name=entry]')->form(); + + $data = [ + 'entry[url]' => $url, + ]; + + $client->submit($form, $data); + + $this->assertSame(302, $client->getResponse()->getStatusCode()); + $this->assertContains('/view/', $client->getResponse()->getTargetUrl()); + } + /** * This test will require an internet connection. */ -- cgit v1.2.3 From f3bfb875e94021a93e24a41fbc0f8d86d4dee378 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 29 May 2019 14:18:04 +0200 Subject: Use hash given url to avoid duplicate Using hashed url we can ensure an index on them to ensure it's fast. --- app/DoctrineMigrations/Version20170710125843.php | 38 --------- app/DoctrineMigrations/Version20171218135243.php | 48 ----------- app/DoctrineMigrations/Version20190601125843.php | 74 ++++++++++++++++ src/Wallabag/CoreBundle/Entity/Entry.php | 99 ++++++++++++---------- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 3 +- .../CoreBundle/Repository/EntryRepository.php | 2 +- 6 files changed, 133 insertions(+), 131 deletions(-) delete mode 100644 app/DoctrineMigrations/Version20170710125843.php delete mode 100644 app/DoctrineMigrations/Version20171218135243.php create mode 100644 app/DoctrineMigrations/Version20190601125843.php diff --git a/app/DoctrineMigrations/Version20170710125843.php b/app/DoctrineMigrations/Version20170710125843.php deleted file mode 100644 index 2cf8647a..00000000 --- a/app/DoctrineMigrations/Version20170710125843.php +++ /dev/null @@ -1,38 +0,0 @@ -getTable($this->getTable('entry')); - - $this->skipIf($entryTable->hasColumn('given_url'), 'It seems that you already played this migration.'); - - $entryTable->addColumn('given_url', 'text', [ - 'notnull' => false, - ]); - } - - /** - * @param Schema $schema - */ - public function down(Schema $schema) - { - $entryTable = $schema->getTable($this->getTable('entry')); - - $this->skipIf(!$entryTable->hasColumn('given_url'), 'It seems that you already played this migration.'); - - $entryTable->dropColumn('given_url'); - } -} diff --git a/app/DoctrineMigrations/Version20171218135243.php b/app/DoctrineMigrations/Version20171218135243.php deleted file mode 100644 index 3060c920..00000000 --- a/app/DoctrineMigrations/Version20171218135243.php +++ /dev/null @@ -1,48 +0,0 @@ -getTable($this->getTable('entry')); - $this->skipIf($entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.'); - - switch ($this->connection->getDatabasePlatform()->getName()) { - case 'sqlite': - $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);'; - break; - case 'mysql': - $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url (255), given_url (255), user_id);'; - break; - case 'postgresql': - $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);'; - break; - } - - $this->addSql($sql); - } - - /** - * @param Schema $schema - */ - public function down(Schema $schema) - { - $entryTable = $schema->getTable($this->getTable('entry')); - $this->skipIf(false === $entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.'); - - $entryTable->dropIndex($this->indexGivenUrl); - } -} diff --git a/app/DoctrineMigrations/Version20190601125843.php b/app/DoctrineMigrations/Version20190601125843.php new file mode 100644 index 00000000..341d64dc --- /dev/null +++ b/app/DoctrineMigrations/Version20190601125843.php @@ -0,0 +1,74 @@ +getTable($this->getTable('entry')); + + if (!$entryTable->hasColumn('given_url')) { + $entryTable->addColumn('given_url', 'text', [ + 'notnull' => false, + ]); + } + + if (!$entryTable->hasColumn('hashed_given_url')) { + $entryTable->addColumn('hashed_given_url', 'text', [ + 'length' => 40, + 'notnull' => false, + ]); + } + + $entryTable->dropIndex('hashed_url_user_id'); + $entryTable->addIndex( + [ + 'user_id', + 'hashed_url', + 'hashed_given_url', + ], + 'hashed_urls_user_id', + [], + [ + // specify length for index which is required by MySQL on text field + 'lengths' => [ + // user_id + null, + // hashed_url + 40, + // hashed_given_url + 40, + ], + ] + ); + } + + /** + * @param Schema $schema + */ + public function down(Schema $schema) + { + $entryTable = $schema->getTable($this->getTable('entry')); + + if ($entryTable->hasColumn('given_url')) { + $entryTable->dropColumn('given_url'); + } + + if ($entryTable->hasColumn('hashed_given_url')) { + $entryTable->dropColumn('hashed_given_url'); + } + + $entryTable->dropIndex('hashed_urls_user_id'); + $entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id', [], ['lengths' => [null, 40]]); + } +} diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 62274136..304dd1b3 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -27,9 +27,8 @@ use Wallabag\UserBundle\Entity\User; * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="uid", columns={"uid"}), - * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) - * }, - * uniqueConstraints={@ORM\UniqueConstraint(name="IDX_entry_given_url",columns={"url", "given_url", "user_id"})} + * @ORM\Index(name="hashed_urls_user_id", columns={"user_id", "hashed_url", "hashed_given_url"}, options={"lengths"={null, 40, 40}}) + * } * ) * @ORM\HasLifecycleCallbacks() * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") @@ -69,30 +68,52 @@ class Entry private $title; /** + * Define the url fetched by wallabag (the final url after potential redirections). + * * @var string * - * @ORM\Column(name="given_url", type="text", nullable=true) + * @Assert\NotBlank() + * @ORM\Column(name="url", type="text", nullable=true) * * @Groups({"entries_for_user", "export_all"}) */ - private $givenUrl; + private $url; /** * @var string * - * @Assert\NotBlank() - * @ORM\Column(name="url", type="text", nullable=true) + * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) + */ + private $hashedUrl; + + /** + * From where user retrieved/found the url (an other article, a twitter, or the given_url if non are provided). + * + * @var string + * + * @ORM\Column(name="origin_url", type="text", nullable=true) * * @Groups({"entries_for_user", "export_all"}) */ - private $url; + private $originUrl; /** + * Define the url entered by the user (without redirections). + * * @var string * - * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) + * @ORM\Column(name="given_url", type="text", nullable=true) + * + * @Groups({"entries_for_user", "export_all"}) */ - private $hashedUrl; + private $givenUrl; + + /** + * @var string + * + * @ORM\Column(name="hashed_given_url", type="string", length=40, nullable=true) + */ + private $hashedGivenUrl; /** * @var bool @@ -273,15 +294,6 @@ class Entry */ private $tags; - /** - * @var string - * - * @ORM\Column(name="origin_url", type="text", nullable=true) - * - * @Groups({"entries_for_user", "export_all"}) - */ - private $originUrl; - /* * @param User $user */ @@ -325,30 +337,6 @@ class Entry return $this->title; } - /** - * Set given url. - * - * @param string $givenUrl - * - * @return Entry - */ - public function setGivenUrl($givenUrl) - { - $this->givenUrl = $givenUrl; - - return $this; - } - - /** - * Get given Url. - * - * @return string - */ - public function getGivenUrl() - { - return $this->givenUrl; - } - /** * Set url. * @@ -956,6 +944,31 @@ class Entry return $this->originUrl; } + /** + * Set origin url. + * + * @param string $givenUrl + * + * @return Entry + */ + public function setGivenUrl($givenUrl) + { + $this->givenUrl = $givenUrl; + $this->hashedGivenUrl = UrlHasher::hashUrl($givenUrl); + + return $this; + } + + /** + * Get origin url. + * + * @return string + */ + public function getGivenUrl() + { + return $this->givenUrl; + } + /** * @return string */ diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 0d6a412d..5901df8b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -76,9 +76,10 @@ class ContentProxy // Not sure what are the other possible cases where this property is empty if (empty($entry->getUrl()) && !empty($url)) { $entry->setUrl($url); - $entry->setGivenUrl($url); } + $entry->setGivenUrl($url); + $this->stockEntry($entry, $content); } diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 299b0b27..8b29aad2 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -368,7 +368,7 @@ class EntryRepository extends EntityRepository { $res = $this->createQueryBuilder('e') ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) - // ->orWhere('e.givenUrl = :url')->setParameter('url', $url) + ->orWhere('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl) ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) ->getQuery() ->getResult(); -- cgit v1.2.3 From 70df4c335965a9562cc24d3ccea0a6ed1a23b7b1 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 5 Jun 2019 10:51:06 +0200 Subject: Use two indexes instead of one for hashed urls When using `OR` in a where clause, a composite index can't be used. We should use a `UNION` to take advantages of it. Instead, create 2 indexes on each hashed urls and make 2 queries to find an url. It'll be faster than the previous solution. --- app/DoctrineMigrations/Version20190601125843.php | 26 +++------------------- src/Wallabag/CoreBundle/Entity/Entry.php | 3 ++- .../CoreBundle/Repository/EntryRepository.php | 13 ++++++++++- .../CoreBundle/Controller/EntryControllerTest.php | 3 +++ 4 files changed, 20 insertions(+), 25 deletions(-) diff --git a/app/DoctrineMigrations/Version20190601125843.php b/app/DoctrineMigrations/Version20190601125843.php index 341d64dc..0e97606e 100644 --- a/app/DoctrineMigrations/Version20190601125843.php +++ b/app/DoctrineMigrations/Version20190601125843.php @@ -30,27 +30,8 @@ class Version20190601125843 extends WallabagMigration ]); } - $entryTable->dropIndex('hashed_url_user_id'); - $entryTable->addIndex( - [ - 'user_id', - 'hashed_url', - 'hashed_given_url', - ], - 'hashed_urls_user_id', - [], - [ - // specify length for index which is required by MySQL on text field - 'lengths' => [ - // user_id - null, - // hashed_url - 40, - // hashed_given_url - 40, - ], - ] - ); + // 40 = length of sha1 field hashed_given_url + $entryTable->addIndex(['user_id', 'hashed_given_url'], 'hashed_given_url_user_id', [], ['lengths' => [null, 40]]); } /** @@ -68,7 +49,6 @@ class Version20190601125843 extends WallabagMigration $entryTable->dropColumn('hashed_given_url'); } - $entryTable->dropIndex('hashed_urls_user_id'); - $entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id', [], ['lengths' => [null, 40]]); + $entryTable->dropIndex('hashed_given_url_user_id'); } } diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 304dd1b3..19f81c0f 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -27,7 +27,8 @@ use Wallabag\UserBundle\Entity\User; * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="uid", columns={"uid"}), - * @ORM\Index(name="hashed_urls_user_id", columns={"user_id", "hashed_url", "hashed_given_url"}, options={"lengths"={null, 40, 40}}) + * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}), + * @ORM\Index(name="hashed_given_url_user_id", columns={"user_id", "hashed_given_url"}, options={"lengths"={null, 40}}) * } * ) * @ORM\HasLifecycleCallbacks() diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 8b29aad2..7772e0b7 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -366,9 +366,20 @@ class EntryRepository extends EntityRepository */ public function findByHashedUrlAndUserId($hashedUrl, $userId) { + // try first using hashed_url (to use the database index) $res = $this->createQueryBuilder('e') ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) - ->orWhere('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl) + ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) + ->getQuery() + ->getResult(); + + if (\count($res)) { + return current($res); + } + + // then try using hashed_given_url (to use the database index) + $res = $this->createQueryBuilder('e') + ->where('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl) ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) ->getQuery() ->getResult(); diff --git a/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php b/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php index a6fd3fff..e9c12c49 100644 --- a/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php +++ b/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php @@ -265,6 +265,9 @@ class EntryControllerTest extends WallabagCoreTestCase $this->assertContains('/view/', $client->getResponse()->getTargetUrl()); } + /** + * This test will require an internet connection. + */ public function testPostNewOkUrlExistWithRedirection() { $this->logInAs('admin'); -- cgit v1.2.3 From d8809f70ea3a2f88635827b37af23b2fc2a93db6 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 5 Jun 2019 10:54:43 +0200 Subject: Typos --- src/Wallabag/CoreBundle/Entity/Entry.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 19f81c0f..4a9cb8d8 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -946,7 +946,7 @@ class Entry } /** - * Set origin url. + * Set given url. * * @param string $givenUrl * @@ -961,7 +961,7 @@ class Entry } /** - * Get origin url. + * Get given url. * * @return string */ -- cgit v1.2.3