From bfe02a0b481055bb4e799200c8daa9a0ad987c71 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Sun, 28 May 2017 14:53:04 +0200 Subject: Hash the urls to check if they exist Signed-off-by: Thomas Citharel --- .../Command/GenerateUrlHashesCommand.php | 95 ++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php (limited to 'src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php') diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php new file mode 100644 index 00000000..fe2644f2 --- /dev/null +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -0,0 +1,95 @@ +setName('wallabag:generate-hashed-urls') + ->setDescription('Generates hashed urls for each entry') + ->setHelp('This command helps you to generates hashes of the url of each entry, to check through API if an URL is already saved') + ->addArgument( + 'username', + InputArgument::OPTIONAL, + 'User to process entries' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $this->output = $output; + + $username = $input->getArgument('username'); + + if ($username) { + try { + $user = $this->getUser($username); + $this->generateHashedUrls($user); + } catch (NoResultException $e) { + $output->writeln(sprintf('User "%s" not found.', $username)); + + return 1; + } + } else { + $users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll(); + + $output->writeln(sprintf('Generating hashed urls for the %d user account entries', count($users))); + + foreach ($users as $user) { + $output->writeln(sprintf('Processing user %s', $user->getUsername())); + $this->generateHashedUrls($user); + } + $output->writeln(sprintf('Finished generated hashed urls')); + } + + return 0; + } + + /** + * @param User $user + */ + private function generateHashedUrls(User $user) + { + $em = $this->getContainer()->get('doctrine.orm.entity_manager'); + $repo = $this->getDoctrine()->getRepository('WallabagCoreBundle:Entry'); + + $entries = $repo->findByUser($user->getId()); + + foreach ($entries as $entry) { + $entry->setHashedUrl(hash('sha512', $entry->getUrl())); + $em->persist($entry); + $em->flush(); + } + + $this->output->writeln(sprintf('Generated hashed urls for user %s', $user->getUserName())); + } + + /** + * Fetches a user from its username. + * + * @param string $username + * + * @return \Wallabag\UserBundle\Entity\User + */ + private function getUser($username) + { + return $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findOneByUserName($username); + } + + private function getDoctrine() + { + return $this->getContainer()->get('doctrine'); + } +} -- cgit v1.2.3 From 9c2b2aae70b06411336e6eb6ac43b3ebd30dc38c Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 1 Apr 2019 11:50:33 +0200 Subject: Keep url in exists endpoint - Add migration - Use md5 instead of sha512 (we don't need security here, just a hash) - Update tests --- .../CoreBundle/Command/GenerateUrlHashesCommand.php | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php') diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php index fe2644f2..fb598390 100644 --- a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -45,13 +45,13 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand } else { $users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll(); - $output->writeln(sprintf('Generating hashed urls for the %d user account entries', count($users))); + $output->writeln(sprintf('Generating hashed urls for "%d" users', \count($users))); foreach ($users as $user) { - $output->writeln(sprintf('Processing user %s', $user->getUsername())); + $output->writeln(sprintf('Processing user: %s', $user->getUsername())); $this->generateHashedUrls($user); } - $output->writeln(sprintf('Finished generated hashed urls')); + $output->writeln('Finished generated hashed urls'); } return 0; @@ -67,13 +67,20 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand $entries = $repo->findByUser($user->getId()); + $i = 1; foreach ($entries as $entry) { - $entry->setHashedUrl(hash('sha512', $entry->getUrl())); + $entry->setHashedUrl(hash('md5', $entry->getUrl())); $em->persist($entry); - $em->flush(); + + if (0 === ($i % 20)) { + $em->flush(); + } + ++$i; } - $this->output->writeln(sprintf('Generated hashed urls for user %s', $user->getUserName())); + $em->flush(); + + $this->output->writeln(sprintf('Generated hashed urls for user: %s', $user->getUserName())); } /** -- cgit v1.2.3 From 8a6456629814039cfc623cdb279bcba06dacff50 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 1 Apr 2019 13:51:57 +0200 Subject: Use a better index for hashed_url It'll most often be used in addition to the `user_id`. Also, automatically generate the hash when saving the url. Switch from `md5` to `sha1`. --- src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php') diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php index fb598390..685e1672 100644 --- a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -69,7 +69,7 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand $i = 1; foreach ($entries as $entry) { - $entry->setHashedUrl(hash('md5', $entry->getUrl())); + $entry->setHashedUrl(hash('sha1', $entry->getUrl())); $em->persist($entry); if (0 === ($i % 20)) { -- cgit v1.2.3 From c579ce2306297674c56376a2ab5c8ba66a272253 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 1 Apr 2019 14:34:20 +0200 Subject: Some cleanup Also, do not run the hashed_url migration into a Doctrine migration --- src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php') diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php index 685e1672..45bd8c5f 100644 --- a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -20,18 +20,14 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand ->setName('wallabag:generate-hashed-urls') ->setDescription('Generates hashed urls for each entry') ->setHelp('This command helps you to generates hashes of the url of each entry, to check through API if an URL is already saved') - ->addArgument( - 'username', - InputArgument::OPTIONAL, - 'User to process entries' - ); + ->addArgument('username', InputArgument::OPTIONAL, 'User to process entries'); } protected function execute(InputInterface $input, OutputInterface $output) { $this->output = $output; - $username = $input->getArgument('username'); + $username = (string) $input->getArgument('username'); if ($username) { try { -- cgit v1.2.3 From 4a5516376bf4c8b0cdc1e81d24ce1cca68425785 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Fri, 10 May 2019 22:07:55 +1000 Subject: Add Wallabag\CoreBundle\Helper\UrlHasher Signed-off-by: Olivier Mehani --- src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php') diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php index 45bd8c5f..775b0413 100644 --- a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -7,6 +7,7 @@ use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; use Symfony\Component\Console\Input\InputArgument; use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Output\OutputInterface; +use Wallabag\CoreBundle\Helper\UrlHasher; use Wallabag\UserBundle\Entity\User; class GenerateUrlHashesCommand extends ContainerAwareCommand @@ -65,7 +66,9 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand $i = 1; foreach ($entries as $entry) { - $entry->setHashedUrl(hash('sha1', $entry->getUrl())); + $entry->setHashedUrl( + UrlHasher::hashUrl($entry->getUrl()) + ); $em->persist($entry); if (0 === ($i % 20)) { -- cgit v1.2.3 From 0132ccd2a2e73a831fa198940c369bcdd5249e8b Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Fri, 24 May 2019 15:15:12 +0200 Subject: Change the way to define algorithm for hashing url --- src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php') diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php index 775b0413..8f2bff11 100644 --- a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -66,9 +66,7 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand $i = 1; foreach ($entries as $entry) { - $entry->setHashedUrl( - UrlHasher::hashUrl($entry->getUrl()) - ); + $entry->setHashedUrl(UrlHasher::hashUrl($entry->getUrl())); $em->persist($entry); if (0 === ($i % 20)) { @@ -87,7 +85,7 @@ class GenerateUrlHashesCommand extends ContainerAwareCommand * * @param string $username * - * @return \Wallabag\UserBundle\Entity\User + * @return User */ private function getUser($username) { -- cgit v1.2.3