From bfe02a0b481055bb4e799200c8daa9a0ad987c71 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Sun, 28 May 2017 14:53:04 +0200 Subject: Hash the urls to check if they exist Signed-off-by: Thomas Citharel --- .../Command/GenerateUrlHashesCommand.php | 95 ++++++++++++++++++++++ .../CoreBundle/DataFixtures/EntryFixtures.php | 1 + src/Wallabag/CoreBundle/Entity/Entry.php | 30 ++++++- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 2 + 4 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php (limited to 'src/Wallabag/CoreBundle') diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php new file mode 100644 index 00000000..fe2644f2 --- /dev/null +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -0,0 +1,95 @@ +setName('wallabag:generate-hashed-urls') + ->setDescription('Generates hashed urls for each entry') + ->setHelp('This command helps you to generates hashes of the url of each entry, to check through API if an URL is already saved') + ->addArgument( + 'username', + InputArgument::OPTIONAL, + 'User to process entries' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $this->output = $output; + + $username = $input->getArgument('username'); + + if ($username) { + try { + $user = $this->getUser($username); + $this->generateHashedUrls($user); + } catch (NoResultException $e) { + $output->writeln(sprintf('User "%s" not found.', $username)); + + return 1; + } + } else { + $users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll(); + + $output->writeln(sprintf('Generating hashed urls for the %d user account entries', count($users))); + + foreach ($users as $user) { + $output->writeln(sprintf('Processing user %s', $user->getUsername())); + $this->generateHashedUrls($user); + } + $output->writeln(sprintf('Finished generated hashed urls')); + } + + return 0; + } + + /** + * @param User $user + */ + private function generateHashedUrls(User $user) + { + $em = $this->getContainer()->get('doctrine.orm.entity_manager'); + $repo = $this->getDoctrine()->getRepository('WallabagCoreBundle:Entry'); + + $entries = $repo->findByUser($user->getId()); + + foreach ($entries as $entry) { + $entry->setHashedUrl(hash('sha512', $entry->getUrl())); + $em->persist($entry); + $em->flush(); + } + + $this->output->writeln(sprintf('Generated hashed urls for user %s', $user->getUserName())); + } + + /** + * Fetches a user from its username. + * + * @param string $username + * + * @return \Wallabag\UserBundle\Entity\User + */ + private function getUser($username) + { + return $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findOneByUserName($username); + } + + private function getDoctrine() + { + return $this->getContainer()->get('doctrine'); + } +} diff --git a/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php b/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php index 024fcfdc..9c10500d 100644 --- a/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php +++ b/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php @@ -30,6 +30,7 @@ class EntryFixtures extends Fixture implements DependentFixtureInterface 'entry2' => [ 'user' => 'admin-user', 'url' => 'http://0.0.0.0/entry2', + 'hashed_url' => hash('md5', 'http://0.0.0.0/entry2'), 'reading_time' => 1, 'domain' => 'domain.io', 'mime' => 'text/html', diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index b3cfdc4a..17a1ed58 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -25,7 +25,8 @@ use Wallabag\UserBundle\Entity\User; * options={"collate"="utf8mb4_unicode_ci", "charset"="utf8mb4"}, * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), - * @ORM\Index(name="uid", columns={"uid"}) + * @ORM\Index(name="uid", columns={"uid"}), + * @ORM\Index(name="hashedurl", columns={"hashedurl"}) * } * ) * @ORM\HasLifecycleCallbacks() @@ -75,6 +76,13 @@ class Entry */ private $url; + /** + * @var string + * + * @ORM\Column(name="hashedurl", type="text", nullable=true) + */ + private $hashedUrl; + /** * @var bool * @@ -911,4 +919,24 @@ class Entry { return $this->originUrl; } + + /** + * @return string + */ + public function getHashedUrl() + { + return $this->hashedUrl; + } + + /** + * @param mixed $hashedUrl + * + * @return Entry + */ + public function setHashedUrl($hashedUrl) + { + $this->hashedUrl = $hashedUrl; + + return $this; + } } diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 31953f12..0534d27b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -248,6 +248,8 @@ class ContentProxy { $this->updateOriginUrl($entry, $content['url']); + $entry->setHashedUrl(hash('md5', $entry->getUrl())); + $this->setEntryDomainName($entry); if (!empty($content['title'])) { -- cgit v1.2.3 From 9c2b2aae70b06411336e6eb6ac43b3ebd30dc38c Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 1 Apr 2019 11:50:33 +0200 Subject: Keep url in exists endpoint - Add migration - Use md5 instead of sha512 (we don't need security here, just a hash) - Update tests --- .../Command/GenerateUrlHashesCommand.php | 19 +++++++++++------ .../CoreBundle/DataFixtures/EntryFixtures.php | 2 +- src/Wallabag/CoreBundle/Entity/Entry.php | 4 ++-- .../CoreBundle/Repository/EntryRepository.php | 24 ++++++++++++++++++++++ 4 files changed, 40 insertions(+), 9 deletions(-) (limited to 'src/Wallabag/CoreBundle') diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php index fe2644f2..fb598390 100644 --- a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -45,13 +45,13 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand } else { $users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll(); - $output->writeln(sprintf('Generating hashed urls for the %d user account entries', count($users))); + $output->writeln(sprintf('Generating hashed urls for "%d" users', \count($users))); foreach ($users as $user) { - $output->writeln(sprintf('Processing user %s', $user->getUsername())); + $output->writeln(sprintf('Processing user: %s', $user->getUsername())); $this->generateHashedUrls($user); } - $output->writeln(sprintf('Finished generated hashed urls')); + $output->writeln('Finished generated hashed urls'); } return 0; @@ -67,13 +67,20 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand $entries = $repo->findByUser($user->getId()); + $i = 1; foreach ($entries as $entry) { - $entry->setHashedUrl(hash('sha512', $entry->getUrl())); + $entry->setHashedUrl(hash('md5', $entry->getUrl())); $em->persist($entry); - $em->flush(); + + if (0 === ($i % 20)) { + $em->flush(); + } + ++$i; } - $this->output->writeln(sprintf('Generated hashed urls for user %s', $user->getUserName())); + $em->flush(); + + $this->output->writeln(sprintf('Generated hashed urls for user: %s', $user->getUserName())); } /** diff --git a/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php b/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php index 9c10500d..1b18cad6 100644 --- a/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php +++ b/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php @@ -30,7 +30,6 @@ class EntryFixtures extends Fixture implements DependentFixtureInterface 'entry2' => [ 'user' => 'admin-user', 'url' => 'http://0.0.0.0/entry2', - 'hashed_url' => hash('md5', 'http://0.0.0.0/entry2'), 'reading_time' => 1, 'domain' => 'domain.io', 'mime' => 'text/html', @@ -90,6 +89,7 @@ class EntryFixtures extends Fixture implements DependentFixtureInterface foreach ($entries as $reference => $item) { $entry = new Entry($this->getReference($item['user'])); $entry->setUrl($item['url']); + $entry->setHashedUrl(hash('md5', $item['url'])); $entry->setReadingTime($item['reading_time']); $entry->setDomainName($item['domain']); $entry->setMimetype($item['mime']); diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 17a1ed58..a04f101f 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -26,7 +26,7 @@ use Wallabag\UserBundle\Entity\User; * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="uid", columns={"uid"}), - * @ORM\Index(name="hashedurl", columns={"hashedurl"}) + * @ORM\Index(name="hashed_url", columns={"hashed_url"}) * } * ) * @ORM\HasLifecycleCallbacks() @@ -79,7 +79,7 @@ class Entry /** * @var string * - * @ORM\Column(name="hashedurl", type="text", nullable=true) + * @ORM\Column(name="hashed_url", type="string", length=32, nullable=true) */ private $hashedUrl; diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 45366623..0c175abb 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -346,6 +346,30 @@ class EntryRepository extends EntityRepository return false; } + /** + * Find an entry by its hashed url and its owner. + * If it exists, return the entry otherwise return false. + * + * @param $hashedUrl + * @param $userId + * + * @return Entry|bool + */ + public function findByHashedUrlAndUserId($hashedUrl, $userId) + { + $res = $this->createQueryBuilder('e') + ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', urldecode($hashedUrl)) + ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) + ->getQuery() + ->getResult(); + + if (\count($res)) { + return current($res); + } + + return false; + } + /** * Count all entries for a user. * -- cgit v1.2.3 From 8a6456629814039cfc623cdb279bcba06dacff50 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 1 Apr 2019 13:51:57 +0200 Subject: Use a better index for hashed_url It'll most often be used in addition to the `user_id`. Also, automatically generate the hash when saving the url. Switch from `md5` to `sha1`. --- src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php | 2 +- src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php | 1 - src/Wallabag/CoreBundle/Entity/Entry.php | 5 +++-- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 2 -- 4 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src/Wallabag/CoreBundle') diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php index fb598390..685e1672 100644 --- a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -69,7 +69,7 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand $i = 1; foreach ($entries as $entry) { - $entry->setHashedUrl(hash('md5', $entry->getUrl())); + $entry->setHashedUrl(hash('sha1', $entry->getUrl())); $em->persist($entry); if (0 === ($i % 20)) { diff --git a/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php b/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php index 1b18cad6..024fcfdc 100644 --- a/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php +++ b/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php @@ -89,7 +89,6 @@ class EntryFixtures extends Fixture implements DependentFixtureInterface foreach ($entries as $reference => $item) { $entry = new Entry($this->getReference($item['user'])); $entry->setUrl($item['url']); - $entry->setHashedUrl(hash('md5', $item['url'])); $entry->setReadingTime($item['reading_time']); $entry->setDomainName($item['domain']); $entry->setMimetype($item['mime']); diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index a04f101f..faf4d259 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -26,7 +26,7 @@ use Wallabag\UserBundle\Entity\User; * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="uid", columns={"uid"}), - * @ORM\Index(name="hashed_url", columns={"hashed_url"}) + * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}) * } * ) * @ORM\HasLifecycleCallbacks() @@ -79,7 +79,7 @@ class Entry /** * @var string * - * @ORM\Column(name="hashed_url", type="string", length=32, nullable=true) + * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) */ private $hashedUrl; @@ -324,6 +324,7 @@ class Entry public function setUrl($url) { $this->url = $url; + $this->hashedUrl = hash('sha1', $url); return $this; } diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 0534d27b..31953f12 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -248,8 +248,6 @@ class ContentProxy { $this->updateOriginUrl($entry, $content['url']); - $entry->setHashedUrl(hash('md5', $entry->getUrl())); - $this->setEntryDomainName($entry); if (!empty($content['title'])) { -- cgit v1.2.3 From c579ce2306297674c56376a2ab5c8ba66a272253 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 1 Apr 2019 14:34:20 +0200 Subject: Some cleanup Also, do not run the hashed_url migration into a Doctrine migration --- src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php | 8 ++------ src/Wallabag/CoreBundle/Repository/EntryRepository.php | 6 +++--- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'src/Wallabag/CoreBundle') diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php index 685e1672..45bd8c5f 100644 --- a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -20,18 +20,14 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand ->setName('wallabag:generate-hashed-urls') ->setDescription('Generates hashed urls for each entry') ->setHelp('This command helps you to generates hashes of the url of each entry, to check through API if an URL is already saved') - ->addArgument( - 'username', - InputArgument::OPTIONAL, - 'User to process entries' - ); + ->addArgument('username', InputArgument::OPTIONAL, 'User to process entries'); } protected function execute(InputInterface $input, OutputInterface $output) { $this->output = $output; - $username = $input->getArgument('username'); + $username = (string) $input->getArgument('username'); if ($username) { try { diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 0c175abb..f5089729 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -350,15 +350,15 @@ class EntryRepository extends EntityRepository * Find an entry by its hashed url and its owner. * If it exists, return the entry otherwise return false. * - * @param $hashedUrl - * @param $userId + * @param string $hashedUrl Url hashed using sha1 + * @param int $userId * * @return Entry|bool */ public function findByHashedUrlAndUserId($hashedUrl, $userId) { $res = $this->createQueryBuilder('e') - ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', urldecode($hashedUrl)) + ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) ->getQuery() ->getResult(); -- cgit v1.2.3 From 5cc0646e66f52448f83a7a458e0b60b4580e83e5 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 1 Apr 2019 15:45:17 +0200 Subject: Fix index on MySQL --- src/Wallabag/CoreBundle/Entity/Entry.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Wallabag/CoreBundle') diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index faf4d259..c3fb87d2 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -26,7 +26,7 @@ use Wallabag\UserBundle\Entity\User; * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), * @ORM\Index(name="uid", columns={"uid"}), - * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}) + * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) * } * ) * @ORM\HasLifecycleCallbacks() -- cgit v1.2.3