From: Jeremy Benoist Date: Wed, 29 May 2019 12:18:04 +0000 (+0200) Subject: Use hash given url to avoid duplicate X-Git-Url: https://git.immae.eu/?a=commitdiff_plain;ds=sidebyside;h=f3bfb875e94021a93e24a41fbc0f8d86d4dee378;p=github%2Fwallabag%2Fwallabag.git Use hash given url to avoid duplicate Using hashed url we can ensure an index on them to ensure it's fast. --- diff --git a/app/DoctrineMigrations/Version20170710125843.php b/app/DoctrineMigrations/Version20170710125843.php deleted file mode 100644 index 2cf8647a..00000000 --- a/app/DoctrineMigrations/Version20170710125843.php +++ /dev/null @@ -1,38 +0,0 @@ -getTable($this->getTable('entry')); - - $this->skipIf($entryTable->hasColumn('given_url'), 'It seems that you already played this migration.'); - - $entryTable->addColumn('given_url', 'text', [ - 'notnull' => false, - ]); - } - - /** - * @param Schema $schema - */ - public function down(Schema $schema) - { - $entryTable = $schema->getTable($this->getTable('entry')); - - $this->skipIf(!$entryTable->hasColumn('given_url'), 'It seems that you already played this migration.'); - - $entryTable->dropColumn('given_url'); - } -} diff --git a/app/DoctrineMigrations/Version20171218135243.php b/app/DoctrineMigrations/Version20171218135243.php deleted file mode 100644 index 3060c920..00000000 --- a/app/DoctrineMigrations/Version20171218135243.php +++ /dev/null @@ -1,48 +0,0 @@ -getTable($this->getTable('entry')); - $this->skipIf($entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.'); - - switch ($this->connection->getDatabasePlatform()->getName()) { - case 'sqlite': - $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);'; - break; - case 'mysql': - $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url (255), given_url (255), user_id);'; - break; - case 'postgresql': - $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);'; - break; - } - - $this->addSql($sql); - } - - /** - * @param Schema $schema - */ - public function down(Schema $schema) - { - $entryTable = $schema->getTable($this->getTable('entry')); - $this->skipIf(false === $entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.'); - - $entryTable->dropIndex($this->indexGivenUrl); - } -} diff --git a/app/DoctrineMigrations/Version20190601125843.php b/app/DoctrineMigrations/Version20190601125843.php new file mode 100644 index 00000000..341d64dc --- /dev/null +++ b/app/DoctrineMigrations/Version20190601125843.php @@ -0,0 +1,74 @@ +getTable($this->getTable('entry')); + + if (!$entryTable->hasColumn('given_url')) { + $entryTable->addColumn('given_url', 'text', [ + 'notnull' => false, + ]); + } + + if (!$entryTable->hasColumn('hashed_given_url')) { + $entryTable->addColumn('hashed_given_url', 'text', [ + 'length' => 40, + 'notnull' => false, + ]); + } + + $entryTable->dropIndex('hashed_url_user_id'); + $entryTable->addIndex( + [ + 'user_id', + 'hashed_url', + 'hashed_given_url', + ], + 'hashed_urls_user_id', + [], + [ + // specify length for index which is required by MySQL on text field + 'lengths' => [ + // user_id + null, + // hashed_url + 40, + // hashed_given_url + 40, + ], + ] + ); + } + + /** + * @param Schema $schema + */ + public function down(Schema $schema) + { + $entryTable = $schema->getTable($this->getTable('entry')); + + if ($entryTable->hasColumn('given_url')) { + $entryTable->dropColumn('given_url'); + } + + if ($entryTable->hasColumn('hashed_given_url')) { + $entryTable->dropColumn('hashed_given_url'); + } + + $entryTable->dropIndex('hashed_urls_user_id'); + $entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id', [], ['lengths' => [null, 40]]); + } +} diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 62274136..304dd1b3 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -27,9 +27,8 @@ use Wallabag\UserBundle\Entity\User; * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="uid", columns={"uid"}), - * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) - * }, - * uniqueConstraints={@ORM\UniqueConstraint(name="IDX_entry_given_url",columns={"url", "given_url", "user_id"})} + * @ORM\Index(name="hashed_urls_user_id", columns={"user_id", "hashed_url", "hashed_given_url"}, options={"lengths"={null, 40, 40}}) + * } * ) * @ORM\HasLifecycleCallbacks() * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") @@ -69,30 +68,52 @@ class Entry private $title; /** + * Define the url fetched by wallabag (the final url after potential redirections). + * * @var string * - * @ORM\Column(name="given_url", type="text", nullable=true) + * @Assert\NotBlank() + * @ORM\Column(name="url", type="text", nullable=true) * * @Groups({"entries_for_user", "export_all"}) */ - private $givenUrl; + private $url; /** * @var string * - * @Assert\NotBlank() - * @ORM\Column(name="url", type="text", nullable=true) + * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) + */ + private $hashedUrl; + + /** + * From where user retrieved/found the url (an other article, a twitter, or the given_url if non are provided). + * + * @var string + * + * @ORM\Column(name="origin_url", type="text", nullable=true) * * @Groups({"entries_for_user", "export_all"}) */ - private $url; + private $originUrl; /** + * Define the url entered by the user (without redirections). + * * @var string * - * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) + * @ORM\Column(name="given_url", type="text", nullable=true) + * + * @Groups({"entries_for_user", "export_all"}) */ - private $hashedUrl; + private $givenUrl; + + /** + * @var string + * + * @ORM\Column(name="hashed_given_url", type="string", length=40, nullable=true) + */ + private $hashedGivenUrl; /** * @var bool @@ -273,15 +294,6 @@ class Entry */ private $tags; - /** - * @var string - * - * @ORM\Column(name="origin_url", type="text", nullable=true) - * - * @Groups({"entries_for_user", "export_all"}) - */ - private $originUrl; - /* * @param User $user */ @@ -325,30 +337,6 @@ class Entry return $this->title; } - /** - * Set given url. - * - * @param string $givenUrl - * - * @return Entry - */ - public function setGivenUrl($givenUrl) - { - $this->givenUrl = $givenUrl; - - return $this; - } - - /** - * Get given Url. - * - * @return string - */ - public function getGivenUrl() - { - return $this->givenUrl; - } - /** * Set url. * @@ -956,6 +944,31 @@ class Entry return $this->originUrl; } + /** + * Set origin url. + * + * @param string $givenUrl + * + * @return Entry + */ + public function setGivenUrl($givenUrl) + { + $this->givenUrl = $givenUrl; + $this->hashedGivenUrl = UrlHasher::hashUrl($givenUrl); + + return $this; + } + + /** + * Get origin url. + * + * @return string + */ + public function getGivenUrl() + { + return $this->givenUrl; + } + /** * @return string */ diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 0d6a412d..5901df8b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -76,9 +76,10 @@ class ContentProxy // Not sure what are the other possible cases where this property is empty if (empty($entry->getUrl()) && !empty($url)) { $entry->setUrl($url); - $entry->setGivenUrl($url); } + $entry->setGivenUrl($url); + $this->stockEntry($entry, $content); } diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 299b0b27..8b29aad2 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -368,7 +368,7 @@ class EntryRepository extends EntityRepository { $res = $this->createQueryBuilder('e') ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) - // ->orWhere('e.givenUrl = :url')->setParameter('url', $url) + ->orWhere('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl) ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) ->getQuery() ->getResult();