From 31e276fc1636b41b03b7c29127681de257c16b06 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Thu, 2 May 2019 21:19:20 +1000 Subject: EntryRestController::getEntriesExistsAction: always find by hashed url Simplify the logic from #3158 by hashing all the urls from the request, and only doing a search by hash. This allows to get performance benefits from the new indexed hash column even when using older clients that do not hash the URL in the request. Fixes: #3158, #3919 Signed-off-by: Olivier Mehani --- .../ApiBundle/Controller/EntryRestController.php | 71 ++++++++++++---------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index aff0534a..17b53a01 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -43,50 +43,59 @@ class EntryRestController extends WallabagRestController $returnId = (null === $request->query->get('return_id')) ? false : (bool) $request->query->get('return_id'); - $urls = $request->query->get('urls', []); $hashedUrls = $request->query->get('hashed_urls', []); + $hashedUrl = $request->query->get('hashed_url', ''); + if (!empty($hashedUrl)) { + $hashedUrls[] = $hashedUrl; + } - // handle multiple urls first - if (!empty($hashedUrls)) { - $results = []; - foreach ($hashedUrls as $hashedUrl) { - $res = $repo->findByHashedUrlAndUserId($hashedUrl, $this->getUser()->getId()); - - $results[$hashedUrl] = $this->returnExistInformation($res, $returnId); - } + $urls = $request->query->get('urls', []); + $url = $request->query->get('url', ''); + if (!empty($url)) { + $urls[] = $url; + } - return $this->sendResponse($results); + $urlHashMap = []; + foreach($urls as $urlToHash) { + $urlHash = hash('sha1', $urlToHash); // XXX: the hash logic would better be in a separate util to avoid duplication with GenerateUrlHashesCommand::generateHashedUrls + $hashedUrls[] = $urlHash; + $urlHashMap[$urlHash] = $urlToHash; } - // @deprecated, to be remove in 3.0 - if (!empty($urls)) { - $results = []; - foreach ($urls as $url) { - $res = $repo->findByUrlAndUserId($url, $this->getUser()->getId()); + if (empty($hashedUrls)) { + throw $this->createAccessDeniedException('URL is empty?, logged user id: ' . $this->getUser()->getId()); + } - $results[$url] = $this->returnExistInformation($res, $returnId); - } + $results = []; + foreach ($hashedUrls as $hashedUrlToSearch) { + $res = $repo->findByHashedUrlAndUserId($hashedUrlToSearch, $this->getUser()->getId()); - return $this->sendResponse($results); + $results[$hashedUrlToSearch] = $this->returnExistInformation($res, $returnId); } - // let's see if it is a simple url? - $url = $request->query->get('url', ''); - $hashedUrl = $request->query->get('hashed_url', ''); + $results = $this->replaceUrlHashes($results, $urlHashMap); - if (empty($url) && empty($hashedUrl)) { - throw $this->createAccessDeniedException('URL is empty?, logged user id: ' . $this->getUser()->getId()); + if (!empty($url) || !empty($hashedUrl)) { + $hu = array_keys($results)[0]; + return $this->sendResponse(['exists' => $results[$hu]]); } + return $this->sendResponse($results); + } - $method = 'findByUrlAndUserId'; - if (!empty($hashedUrl)) { - $method = 'findByHashedUrlAndUserId'; - $url = $hashedUrl; + /** + * Replace the hashedUrl keys in $results with the unhashed URL from the + * request, as recorded in $urlHashMap. + */ + private function replaceUrlHashes(array $results, array $urlHashMap) { + $newResults = []; + foreach($results as $hash => $res) { + if (isset($urlHashMap[$hash])) { + $newResults[$urlHashMap[$hash]] = $res; + } else { + $newResults[$hash] = $res; + } } - - $res = $repo->$method($url, $this->getUser()->getId()); - - return $this->sendResponse(['exists' => $this->returnExistInformation($res, $returnId)]); + return $newResults; } /** -- cgit v1.2.3 From d5744bf0dfdbee4dbbe380d8a076d07b89fc76e6 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Fri, 3 May 2019 22:23:04 +1000 Subject: Delegate findByUrlAndUserId to findByHashedUrlAndUserId Signed-off-by: Olivier Mehani --- src/Wallabag/CoreBundle/Repository/EntryRepository.php | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 3990932e..960b682d 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -348,17 +348,9 @@ class EntryRepository extends EntityRepository */ public function findByUrlAndUserId($url, $userId) { - $res = $this->createQueryBuilder('e') - ->where('e.url = :url')->setParameter('url', urldecode($url)) - ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) - ->getQuery() - ->getResult(); - - if (\count($res)) { - return current($res); - } - - return false; + return $this->findByHashedUrlAndUserId( + hash('sha1', $url), // XXX: the hash logic would better be in a separate util to avoid duplication with GenerateUrlHashesCommand::generateHashedUrls + $userId); } /** -- cgit v1.2.3 From 4a5516376bf4c8b0cdc1e81d24ce1cca68425785 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Fri, 10 May 2019 22:07:55 +1000 Subject: Add Wallabag\CoreBundle\Helper\UrlHasher Signed-off-by: Olivier Mehani --- .../ApiBundle/Controller/EntryRestController.php | 41 ++++++++++++---------- .../Command/GenerateUrlHashesCommand.php | 5 ++- src/Wallabag/CoreBundle/Entity/Entry.php | 3 +- src/Wallabag/CoreBundle/Helper/UrlHasher.php | 22 ++++++++++++ .../CoreBundle/Repository/EntryRepository.php | 29 ++++++++++++++- 5 files changed, 79 insertions(+), 21 deletions(-) create mode 100644 src/Wallabag/CoreBundle/Helper/UrlHasher.php diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index 17b53a01..77eb489e 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -14,6 +14,7 @@ use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Entity\Tag; use Wallabag\CoreBundle\Event\EntryDeletedEvent; use Wallabag\CoreBundle\Event\EntrySavedEvent; +use Wallabag\CoreBundle\Helper\UrlHasher; class EntryRestController extends WallabagRestController { @@ -56,8 +57,8 @@ class EntryRestController extends WallabagRestController } $urlHashMap = []; - foreach($urls as $urlToHash) { - $urlHash = hash('sha1', $urlToHash); // XXX: the hash logic would better be in a separate util to avoid duplication with GenerateUrlHashesCommand::generateHashedUrls + foreach ($urls as $urlToHash) { + $urlHash = UrlHasher::hashUrl($urlToHash); $hashedUrls[] = $urlHash; $urlHashMap[$urlHash] = $urlToHash; } @@ -77,25 +78,11 @@ class EntryRestController extends WallabagRestController if (!empty($url) || !empty($hashedUrl)) { $hu = array_keys($results)[0]; + return $this->sendResponse(['exists' => $results[$hu]]); } - return $this->sendResponse($results); - } - /** - * Replace the hashedUrl keys in $results with the unhashed URL from the - * request, as recorded in $urlHashMap. - */ - private function replaceUrlHashes(array $results, array $urlHashMap) { - $newResults = []; - foreach($results as $hash => $res) { - if (isset($urlHashMap[$hash])) { - $newResults[$urlHashMap[$hash]] = $res; - } else { - $newResults[$hash] = $res; - } - } - return $newResults; + return $this->sendResponse($results); } /** @@ -815,6 +802,24 @@ class EntryRestController extends WallabagRestController return $this->sendResponse($results); } + /** + * Replace the hashedUrl keys in $results with the unhashed URL from the + * request, as recorded in $urlHashMap. + */ + private function replaceUrlHashes(array $results, array $urlHashMap) + { + $newResults = []; + foreach ($results as $hash => $res) { + if (isset($urlHashMap[$hash])) { + $newResults[$urlHashMap[$hash]] = $res; + } else { + $newResults[$hash] = $res; + } + } + + return $newResults; + } + /** * Retrieve value from the request. * Used for POST & PATCH on a an entry. diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php index 45bd8c5f..775b0413 100644 --- a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -7,6 +7,7 @@ use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; use Symfony\Component\Console\Input\InputArgument; use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Output\OutputInterface; +use Wallabag\CoreBundle\Helper\UrlHasher; use Wallabag\UserBundle\Entity\User; class GenerateUrlHashesCommand extends ContainerAwareCommand @@ -65,7 +66,9 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand $i = 1; foreach ($entries as $entry) { - $entry->setHashedUrl(hash('sha1', $entry->getUrl())); + $entry->setHashedUrl( + UrlHasher::hashUrl($entry->getUrl()) + ); $em->persist($entry); if (0 === ($i % 20)) { diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index c3fb87d2..1b4367fd 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -13,6 +13,7 @@ use JMS\Serializer\Annotation\XmlRoot; use Symfony\Component\Validator\Constraints as Assert; use Wallabag\AnnotationBundle\Entity\Annotation; use Wallabag\CoreBundle\Helper\EntityTimestampsTrait; +use Wallabag\CoreBundle\Helper\UrlHasher; use Wallabag\UserBundle\Entity\User; /** @@ -324,7 +325,7 @@ class Entry public function setUrl($url) { $this->url = $url; - $this->hashedUrl = hash('sha1', $url); + $this->hashedUrl = UrlHasher::hashUrl($url); return $this; } diff --git a/src/Wallabag/CoreBundle/Helper/UrlHasher.php b/src/Wallabag/CoreBundle/Helper/UrlHasher.php new file mode 100644 index 00000000..e44f219a --- /dev/null +++ b/src/Wallabag/CoreBundle/Helper/UrlHasher.php @@ -0,0 +1,22 @@ +findByHashedUrlAndUserId( - hash('sha1', $url), // XXX: the hash logic would better be in a separate util to avoid duplication with GenerateUrlHashesCommand::generateHashedUrls + UrlHasher::hashUrl($url), $userId); } @@ -506,6 +507,32 @@ class EntryRepository extends EntityRepository return $this->find($randomId); } + /** + * Inject a UrlHasher. + * + * @param UrlHasher $hasher + */ + public function setUrlHasher(UrlHasher $hasher) + { + $this->urlHasher = $hasher; + } + + /** + * Get the UrlHasher, or create a default one if not injected. + * + * XXX: the default uses the default hash algorithm + * + * @return UrlHasher + */ + protected function getUrlHasher() + { + if (!isset($this->urlHasher)) { + $this->setUrlHasher(new UrlHasher()); + } + + return $this->urlHasher; + } + /** * Return a query builder to be used by other getBuilderFor* method. * -- cgit v1.2.3 From 0132ccd2a2e73a831fa198940c369bcdd5249e8b Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Fri, 24 May 2019 15:15:12 +0200 Subject: Change the way to define algorithm for hashing url --- .../CoreBundle/Command/GenerateUrlHashesCommand.php | 6 ++---- src/Wallabag/CoreBundle/Helper/UrlHasher.php | 13 +++++++------ src/Wallabag/CoreBundle/Repository/EntryRepository.php | 3 ++- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php index 775b0413..8f2bff11 100644 --- a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -66,9 +66,7 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand $i = 1; foreach ($entries as $entry) { - $entry->setHashedUrl( - UrlHasher::hashUrl($entry->getUrl()) - ); + $entry->setHashedUrl(UrlHasher::hashUrl($entry->getUrl())); $em->persist($entry); if (0 === ($i % 20)) { @@ -87,7 +85,7 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand * * @param string $username * - * @return \Wallabag\UserBundle\Entity\User + * @return User */ private function getUser($username) { diff --git a/src/Wallabag/CoreBundle/Helper/UrlHasher.php b/src/Wallabag/CoreBundle/Helper/UrlHasher.php index e44f219a..d123eaba 100644 --- a/src/Wallabag/CoreBundle/Helper/UrlHasher.php +++ b/src/Wallabag/CoreBundle/Helper/UrlHasher.php @@ -7,16 +7,17 @@ namespace Wallabag\CoreBundle\Helper; */ class UrlHasher { - /** @var string */ - const ALGORITHM = 'sha1'; - /** + * Hash the given url using the given algorithm. + * Hashed url are faster to be retrieved in the database than the real url. + * * @param string $url + * @param string $algorithm * - * @return string hashed $url + * @return string */ - public static function hashUrl(string $url) + public static function hashUrl(string $url, $algorithm = 'sha1') { - return hash(static::ALGORITHM, $url); + return hash($algorithm, urldecode($url)); } } diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 37fc1000..7c4a05ed 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -351,7 +351,8 @@ class EntryRepository extends EntityRepository { return $this->findByHashedUrlAndUserId( UrlHasher::hashUrl($url), - $userId); + $userId + ); } /** -- cgit v1.2.3 From 629a3797bcef33943df8ef5631328e05d12634ed Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Fri, 24 May 2019 15:43:30 +0200 Subject: Remove useless methods Also fix a phpdoc block --- .../ApiBundle/Controller/EntryRestController.php | 4 ++-- .../CoreBundle/Repository/EntryRepository.php | 26 ---------------------- 2 files changed, 2 insertions(+), 28 deletions(-) diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index 77eb489e..bdd02129 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -848,8 +848,8 @@ class EntryRestController extends WallabagRestController /** * Return information about the entry if it exist and depending on the id or not. * - * @param Entry|null $entry - * @param bool $returnId + * @param Entry|bool|null $entry + * @param bool $returnId * * @return bool|int */ diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 7c4a05ed..880e7c65 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -508,32 +508,6 @@ class EntryRepository extends EntityRepository return $this->find($randomId); } - /** - * Inject a UrlHasher. - * - * @param UrlHasher $hasher - */ - public function setUrlHasher(UrlHasher $hasher) - { - $this->urlHasher = $hasher; - } - - /** - * Get the UrlHasher, or create a default one if not injected. - * - * XXX: the default uses the default hash algorithm - * - * @return UrlHasher - */ - protected function getUrlHasher() - { - if (!isset($this->urlHasher)) { - $this->setUrlHasher(new UrlHasher()); - } - - return $this->urlHasher; - } - /** * Return a query builder to be used by other getBuilderFor* method. * -- cgit v1.2.3