From bfe02a0b481055bb4e799200c8daa9a0ad987c71 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Sun, 28 May 2017 14:53:04 +0200 Subject: Hash the urls to check if they exist Signed-off-by: Thomas Citharel --- .../ApiBundle/Controller/EntryRestController.php | 36 +++++--- .../Command/GenerateUrlHashesCommand.php | 95 ++++++++++++++++++++++ .../CoreBundle/DataFixtures/EntryFixtures.php | 1 + src/Wallabag/CoreBundle/Entity/Entry.php | 30 ++++++- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 2 + 5 files changed, 152 insertions(+), 12 deletions(-) create mode 100644 src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php (limited to 'src/Wallabag') diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index 5c850091..26746f7d 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -29,6 +29,8 @@ class EntryRestController extends WallabagRestController * {"name"="return_id", "dataType"="string", "required"=false, "format"="1 or 0", "description"="Set 1 if you want to retrieve ID in case entry(ies) exists, 0 by default"}, * {"name"="url", "dataType"="string", "required"=true, "format"="An url", "description"="Url to check if it exists"}, * {"name"="urls", "dataType"="string", "required"=false, "format"="An array of urls (?urls[]=http...&urls[]=http...)", "description"="Urls (as an array) to check if it exists"} + * {"name"="hashedurl", "dataType"="string", "required"=true, "format"="An url", "description"="Md5 url to check if it exists"}, + * {"name"="hashedurls", "dataType"="string", "required"=false, "format"="An array of urls (?urls[]=http...&urls[]=http...)", "description"="Md5 urls (as an array) to check if it exists"} * } * ) * @@ -41,34 +43,46 @@ class EntryRestController extends WallabagRestController $returnId = (null === $request->query->get('return_id')) ? false : (bool) $request->query->get('return_id'); $urls = $request->query->get('urls', []); + $hashedUrls = $request->query->get('hashedurls', []); + // handle multiple urls first - if (!empty($urls)) { + if (!empty($hashedUrls)) { $results = []; - foreach ($urls as $url) { + foreach ($hashedUrls as $hashedUrl) { $res = $this->getDoctrine() ->getRepository('WallabagCoreBundle:Entry') - ->findByUrlAndUserId($url, $this->getUser()->getId()); + ->findOneBy([ + 'hashedUrl' => $hashedUrl, + 'user' => $this->getUser()->getId(), + ]); - $results[$url] = $this->returnExistInformation($res, $returnId); + // $results[$url] = $this->returnExistInformation($res, $returnId); + $results[$hashedUrl] = $this->returnExistInformation($res, $returnId); } return $this->sendResponse($results); } // let's see if it is a simple url? - $url = $request->query->get('url', ''); + $hashedUrl = $request->query->get('hashedurl', ''); + + // if (empty($url)) { + // throw $this->createAccessDeniedException('URL is empty?, logged user id: ' . $this->getUser()->getId()); + // } - if (empty($url)) { - throw $this->createAccessDeniedException('URL is empty?, logged user id: ' . $this->getUser()->getId()); + if (empty($hashedUrl)) { + throw $this->createAccessDeniedException('URL is empty?, logged user id: '.$this->getUser()->getId()); } $res = $this->getDoctrine() ->getRepository('WallabagCoreBundle:Entry') - ->findByUrlAndUserId($url, $this->getUser()->getId()); - - $exists = $this->returnExistInformation($res, $returnId); + // ->findByUrlAndUserId($url, $this->getUser()->getId()); + ->findOneBy([ + 'hashedUrl' => $hashedUrl, + 'user' => $this->getUser()->getId(), + ]); - return $this->sendResponse(['exists' => $exists]); + return $this->sendResponse(['exists' => $this->returnExistInformation($res, $returnId)]); } /** diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php new file mode 100644 index 00000000..fe2644f2 --- /dev/null +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -0,0 +1,95 @@ +setName('wallabag:generate-hashed-urls') + ->setDescription('Generates hashed urls for each entry') + ->setHelp('This command helps you to generates hashes of the url of each entry, to check through API if an URL is already saved') + ->addArgument( + 'username', + InputArgument::OPTIONAL, + 'User to process entries' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $this->output = $output; + + $username = $input->getArgument('username'); + + if ($username) { + try { + $user = $this->getUser($username); + $this->generateHashedUrls($user); + } catch (NoResultException $e) { + $output->writeln(sprintf('User "%s" not found.', $username)); + + return 1; + } + } else { + $users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll(); + + $output->writeln(sprintf('Generating hashed urls for the %d user account entries', count($users))); + + foreach ($users as $user) { + $output->writeln(sprintf('Processing user %s', $user->getUsername())); + $this->generateHashedUrls($user); + } + $output->writeln(sprintf('Finished generated hashed urls')); + } + + return 0; + } + + /** + * @param User $user + */ + private function generateHashedUrls(User $user) + { + $em = $this->getContainer()->get('doctrine.orm.entity_manager'); + $repo = $this->getDoctrine()->getRepository('WallabagCoreBundle:Entry'); + + $entries = $repo->findByUser($user->getId()); + + foreach ($entries as $entry) { + $entry->setHashedUrl(hash('sha512', $entry->getUrl())); + $em->persist($entry); + $em->flush(); + } + + $this->output->writeln(sprintf('Generated hashed urls for user %s', $user->getUserName())); + } + + /** + * Fetches a user from its username. + * + * @param string $username + * + * @return \Wallabag\UserBundle\Entity\User + */ + private function getUser($username) + { + return $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findOneByUserName($username); + } + + private function getDoctrine() + { + return $this->getContainer()->get('doctrine'); + } +} diff --git a/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php b/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php index 024fcfdc..9c10500d 100644 --- a/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php +++ b/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php @@ -30,6 +30,7 @@ class EntryFixtures extends Fixture implements DependentFixtureInterface 'entry2' => [ 'user' => 'admin-user', 'url' => 'http://0.0.0.0/entry2', + 'hashed_url' => hash('md5', 'http://0.0.0.0/entry2'), 'reading_time' => 1, 'domain' => 'domain.io', 'mime' => 'text/html', diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index b3cfdc4a..17a1ed58 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -25,7 +25,8 @@ use Wallabag\UserBundle\Entity\User; * options={"collate"="utf8mb4_unicode_ci", "charset"="utf8mb4"}, * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), - * @ORM\Index(name="uid", columns={"uid"}) + * @ORM\Index(name="uid", columns={"uid"}), + * @ORM\Index(name="hashedurl", columns={"hashedurl"}) * } * ) * @ORM\HasLifecycleCallbacks() @@ -75,6 +76,13 @@ class Entry */ private $url; + /** + * @var string + * + * @ORM\Column(name="hashedurl", type="text", nullable=true) + */ + private $hashedUrl; + /** * @var bool * @@ -911,4 +919,24 @@ class Entry { return $this->originUrl; } + + /** + * @return string + */ + public function getHashedUrl() + { + return $this->hashedUrl; + } + + /** + * @param mixed $hashedUrl + * + * @return Entry + */ + public function setHashedUrl($hashedUrl) + { + $this->hashedUrl = $hashedUrl; + + return $this; + } } diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 31953f12..0534d27b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -248,6 +248,8 @@ class ContentProxy { $this->updateOriginUrl($entry, $content['url']); + $entry->setHashedUrl(hash('md5', $entry->getUrl())); + $this->setEntryDomainName($entry); if (!empty($content['title'])) { -- cgit v1.2.3