From b7fa51ae7dd5fef2d9459100c88479413ddd3fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Mon, 10 Jul 2017 21:32:25 +0200 Subject: Added given_url in entry table - Added index on entry table for given_url field - Fix tests: The previous `bit.ly` url redirected to doc.wallabag but that url doesn't exist in the fixtures. I used our own internal "redirector" to create a redirect to an url which exist in the fixtures. Also, updating current migration to use the new `WallabagMigration`. --- src/Wallabag/CoreBundle/Entity/Entry.php | 36 +++++++++++++++++++++- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 1 + .../CoreBundle/Repository/EntryRepository.php | 1 + 3 files changed, 37 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 1b4367fd..62274136 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -28,7 +28,8 @@ use Wallabag\UserBundle\Entity\User; * @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="uid", columns={"uid"}), * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) - * } + * }, + * uniqueConstraints={@ORM\UniqueConstraint(name="IDX_entry_given_url",columns={"url", "given_url", "user_id"})} * ) * @ORM\HasLifecycleCallbacks() * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") @@ -67,6 +68,15 @@ class Entry */ private $title; + /** + * @var string + * + * @ORM\Column(name="given_url", type="text", nullable=true) + * + * @Groups({"entries_for_user", "export_all"}) + */ + private $givenUrl; + /** * @var string * @@ -315,6 +325,30 @@ class Entry return $this->title; } + /** + * Set given url. + * + * @param string $givenUrl + * + * @return Entry + */ + public function setGivenUrl($givenUrl) + { + $this->givenUrl = $givenUrl; + + return $this; + } + + /** + * Get given Url. + * + * @return string + */ + public function getGivenUrl() + { + return $this->givenUrl; + } + /** * Set url. * diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index c6fa0d98..0d6a412d 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -76,6 +76,7 @@ class ContentProxy // Not sure what are the other possible cases where this property is empty if (empty($entry->getUrl()) && !empty($url)) { $entry->setUrl($url); + $entry->setGivenUrl($url); } $this->stockEntry($entry, $content); diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 880e7c65..299b0b27 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -368,6 +368,7 @@ class EntryRepository extends EntityRepository { $res = $this->createQueryBuilder('e') ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) + // ->orWhere('e.givenUrl = :url')->setParameter('url', $url) ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) ->getQuery() ->getResult(); -- cgit v1.2.3 From f3bfb875e94021a93e24a41fbc0f8d86d4dee378 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 29 May 2019 14:18:04 +0200 Subject: Use hash given url to avoid duplicate Using hashed url we can ensure an index on them to ensure it's fast. --- src/Wallabag/CoreBundle/Entity/Entry.php | 99 ++++++++++++---------- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 3 +- .../CoreBundle/Repository/EntryRepository.php | 2 +- 3 files changed, 59 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 62274136..304dd1b3 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -27,9 +27,8 @@ use Wallabag\UserBundle\Entity\User; * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="uid", columns={"uid"}), - * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) - * }, - * uniqueConstraints={@ORM\UniqueConstraint(name="IDX_entry_given_url",columns={"url", "given_url", "user_id"})} + * @ORM\Index(name="hashed_urls_user_id", columns={"user_id", "hashed_url", "hashed_given_url"}, options={"lengths"={null, 40, 40}}) + * } * ) * @ORM\HasLifecycleCallbacks() * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") @@ -69,30 +68,52 @@ class Entry private $title; /** + * Define the url fetched by wallabag (the final url after potential redirections). + * * @var string * - * @ORM\Column(name="given_url", type="text", nullable=true) + * @Assert\NotBlank() + * @ORM\Column(name="url", type="text", nullable=true) * * @Groups({"entries_for_user", "export_all"}) */ - private $givenUrl; + private $url; /** * @var string * - * @Assert\NotBlank() - * @ORM\Column(name="url", type="text", nullable=true) + * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) + */ + private $hashedUrl; + + /** + * From where user retrieved/found the url (an other article, a twitter, or the given_url if non are provided). + * + * @var string + * + * @ORM\Column(name="origin_url", type="text", nullable=true) * * @Groups({"entries_for_user", "export_all"}) */ - private $url; + private $originUrl; /** + * Define the url entered by the user (without redirections). + * * @var string * - * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) + * @ORM\Column(name="given_url", type="text", nullable=true) + * + * @Groups({"entries_for_user", "export_all"}) */ - private $hashedUrl; + private $givenUrl; + + /** + * @var string + * + * @ORM\Column(name="hashed_given_url", type="string", length=40, nullable=true) + */ + private $hashedGivenUrl; /** * @var bool @@ -273,15 +294,6 @@ class Entry */ private $tags; - /** - * @var string - * - * @ORM\Column(name="origin_url", type="text", nullable=true) - * - * @Groups({"entries_for_user", "export_all"}) - */ - private $originUrl; - /* * @param User $user */ @@ -325,30 +337,6 @@ class Entry return $this->title; } - /** - * Set given url. - * - * @param string $givenUrl - * - * @return Entry - */ - public function setGivenUrl($givenUrl) - { - $this->givenUrl = $givenUrl; - - return $this; - } - - /** - * Get given Url. - * - * @return string - */ - public function getGivenUrl() - { - return $this->givenUrl; - } - /** * Set url. * @@ -956,6 +944,31 @@ class Entry return $this->originUrl; } + /** + * Set origin url. + * + * @param string $givenUrl + * + * @return Entry + */ + public function setGivenUrl($givenUrl) + { + $this->givenUrl = $givenUrl; + $this->hashedGivenUrl = UrlHasher::hashUrl($givenUrl); + + return $this; + } + + /** + * Get origin url. + * + * @return string + */ + public function getGivenUrl() + { + return $this->givenUrl; + } + /** * @return string */ diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 0d6a412d..5901df8b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -76,9 +76,10 @@ class ContentProxy // Not sure what are the other possible cases where this property is empty if (empty($entry->getUrl()) && !empty($url)) { $entry->setUrl($url); - $entry->setGivenUrl($url); } + $entry->setGivenUrl($url); + $this->stockEntry($entry, $content); } diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 299b0b27..8b29aad2 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -368,7 +368,7 @@ class EntryRepository extends EntityRepository { $res = $this->createQueryBuilder('e') ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) - // ->orWhere('e.givenUrl = :url')->setParameter('url', $url) + ->orWhere('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl) ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) ->getQuery() ->getResult(); -- cgit v1.2.3 From 70df4c335965a9562cc24d3ccea0a6ed1a23b7b1 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 5 Jun 2019 10:51:06 +0200 Subject: Use two indexes instead of one for hashed urls When using `OR` in a where clause, a composite index can't be used. We should use a `UNION` to take advantages of it. Instead, create 2 indexes on each hashed urls and make 2 queries to find an url. It'll be faster than the previous solution. --- src/Wallabag/CoreBundle/Entity/Entry.php | 3 ++- src/Wallabag/CoreBundle/Repository/EntryRepository.php | 13 ++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 304dd1b3..19f81c0f 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -27,7 +27,8 @@ use Wallabag\UserBundle\Entity\User; * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="uid", columns={"uid"}), - * @ORM\Index(name="hashed_urls_user_id", columns={"user_id", "hashed_url", "hashed_given_url"}, options={"lengths"={null, 40, 40}}) + * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}), + * @ORM\Index(name="hashed_given_url_user_id", columns={"user_id", "hashed_given_url"}, options={"lengths"={null, 40}}) * } * ) * @ORM\HasLifecycleCallbacks() diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 8b29aad2..7772e0b7 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -366,9 +366,20 @@ class EntryRepository extends EntityRepository */ public function findByHashedUrlAndUserId($hashedUrl, $userId) { + // try first using hashed_url (to use the database index) $res = $this->createQueryBuilder('e') ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) - ->orWhere('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl) + ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) + ->getQuery() + ->getResult(); + + if (\count($res)) { + return current($res); + } + + // then try using hashed_given_url (to use the database index) + $res = $this->createQueryBuilder('e') + ->where('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl) ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) ->getQuery() ->getResult(); -- cgit v1.2.3 From d8809f70ea3a2f88635827b37af23b2fc2a93db6 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 5 Jun 2019 10:54:43 +0200 Subject: Typos --- src/Wallabag/CoreBundle/Entity/Entry.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 19f81c0f..4a9cb8d8 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -946,7 +946,7 @@ class Entry } /** - * Set origin url. + * Set given url. * * @param string $givenUrl * @@ -961,7 +961,7 @@ class Entry } /** - * Get origin url. + * Get given url. * * @return string */ -- cgit v1.2.3