From bfe02a0b481055bb4e799200c8daa9a0ad987c71 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Sun, 28 May 2017 14:53:04 +0200 Subject: [PATCH] Hash the urls to check if they exist Signed-off-by: Thomas Citharel --- .../Controller/EntryRestController.php | 36 +++++-- .../Command/GenerateUrlHashesCommand.php | 95 ++++++++++++++++ .../CoreBundle/DataFixtures/EntryFixtures.php | 1 + src/Wallabag/CoreBundle/Entity/Entry.php | 30 +++++- .../CoreBundle/Helper/ContentProxy.php | 2 + .../Controller/EntryRestControllerTest.php | 55 +++++++++- .../Command/GenerateUrlHashesCommandTest.php | 101 ++++++++++++++++++ 7 files changed, 306 insertions(+), 14 deletions(-) create mode 100644 src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php create mode 100644 tests/Wallabag/CoreBundle/Command/GenerateUrlHashesCommandTest.php diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index 5c850091..26746f7d 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -29,6 +29,8 @@ class EntryRestController extends WallabagRestController * {"name"="return_id", "dataType"="string", "required"=false, "format"="1 or 0", "description"="Set 1 if you want to retrieve ID in case entry(ies) exists, 0 by default"}, * {"name"="url", "dataType"="string", "required"=true, "format"="An url", "description"="Url to check if it exists"}, * {"name"="urls", "dataType"="string", "required"=false, "format"="An array of urls (?urls[]=http...&urls[]=http...)", "description"="Urls (as an array) to check if it exists"} + * {"name"="hashedurl", "dataType"="string", "required"=true, "format"="An url", "description"="Md5 url to check if it exists"}, + * {"name"="hashedurls", "dataType"="string", "required"=false, "format"="An array of urls (?urls[]=http...&urls[]=http...)", "description"="Md5 urls (as an array) to check if it exists"} * } * ) * @@ -41,34 +43,46 @@ class EntryRestController extends WallabagRestController $returnId = (null === $request->query->get('return_id')) ? false : (bool) $request->query->get('return_id'); $urls = $request->query->get('urls', []); + $hashedUrls = $request->query->get('hashedurls', []); + // handle multiple urls first - if (!empty($urls)) { + if (!empty($hashedUrls)) { $results = []; - foreach ($urls as $url) { + foreach ($hashedUrls as $hashedUrl) { $res = $this->getDoctrine() ->getRepository('WallabagCoreBundle:Entry') - ->findByUrlAndUserId($url, $this->getUser()->getId()); + ->findOneBy([ + 'hashedUrl' => $hashedUrl, + 'user' => $this->getUser()->getId(), + ]); - $results[$url] = $this->returnExistInformation($res, $returnId); + // $results[$url] = $this->returnExistInformation($res, $returnId); + $results[$hashedUrl] = $this->returnExistInformation($res, $returnId); } return $this->sendResponse($results); } // let's see if it is a simple url? - $url = $request->query->get('url', ''); + $hashedUrl = $request->query->get('hashedurl', ''); + + // if (empty($url)) { + // throw $this->createAccessDeniedException('URL is empty?, logged user id: ' . $this->getUser()->getId()); + // } - if (empty($url)) { - throw $this->createAccessDeniedException('URL is empty?, logged user id: ' . $this->getUser()->getId()); + if (empty($hashedUrl)) { + throw $this->createAccessDeniedException('URL is empty?, logged user id: '.$this->getUser()->getId()); } $res = $this->getDoctrine() ->getRepository('WallabagCoreBundle:Entry') - ->findByUrlAndUserId($url, $this->getUser()->getId()); - - $exists = $this->returnExistInformation($res, $returnId); + // ->findByUrlAndUserId($url, $this->getUser()->getId()); + ->findOneBy([ + 'hashedUrl' => $hashedUrl, + 'user' => $this->getUser()->getId(), + ]); - return $this->sendResponse(['exists' => $exists]); + return $this->sendResponse(['exists' => $this->returnExistInformation($res, $returnId)]); } /** diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php new file mode 100644 index 00000000..fe2644f2 --- /dev/null +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -0,0 +1,95 @@ +setName('wallabag:generate-hashed-urls') + ->setDescription('Generates hashed urls for each entry') + ->setHelp('This command helps you to generates hashes of the url of each entry, to check through API if an URL is already saved') + ->addArgument( + 'username', + InputArgument::OPTIONAL, + 'User to process entries' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $this->output = $output; + + $username = $input->getArgument('username'); + + if ($username) { + try { + $user = $this->getUser($username); + $this->generateHashedUrls($user); + } catch (NoResultException $e) { + $output->writeln(sprintf('User "%s" not found.', $username)); + + return 1; + } + } else { + $users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll(); + + $output->writeln(sprintf('Generating hashed urls for the %d user account entries', count($users))); + + foreach ($users as $user) { + $output->writeln(sprintf('Processing user %s', $user->getUsername())); + $this->generateHashedUrls($user); + } + $output->writeln(sprintf('Finished generated hashed urls')); + } + + return 0; + } + + /** + * @param User $user + */ + private function generateHashedUrls(User $user) + { + $em = $this->getContainer()->get('doctrine.orm.entity_manager'); + $repo = $this->getDoctrine()->getRepository('WallabagCoreBundle:Entry'); + + $entries = $repo->findByUser($user->getId()); + + foreach ($entries as $entry) { + $entry->setHashedUrl(hash('sha512', $entry->getUrl())); + $em->persist($entry); + $em->flush(); + } + + $this->output->writeln(sprintf('Generated hashed urls for user %s', $user->getUserName())); + } + + /** + * Fetches a user from its username. + * + * @param string $username + * + * @return \Wallabag\UserBundle\Entity\User + */ + private function getUser($username) + { + return $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findOneByUserName($username); + } + + private function getDoctrine() + { + return $this->getContainer()->get('doctrine'); + } +} diff --git a/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php b/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php index 024fcfdc..9c10500d 100644 --- a/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php +++ b/src/Wallabag/CoreBundle/DataFixtures/EntryFixtures.php @@ -30,6 +30,7 @@ class EntryFixtures extends Fixture implements DependentFixtureInterface 'entry2' => [ 'user' => 'admin-user', 'url' => 'http://0.0.0.0/entry2', + 'hashed_url' => hash('md5', 'http://0.0.0.0/entry2'), 'reading_time' => 1, 'domain' => 'domain.io', 'mime' => 'text/html', diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index b3cfdc4a..17a1ed58 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -25,7 +25,8 @@ use Wallabag\UserBundle\Entity\User; * options={"collate"="utf8mb4_unicode_ci", "charset"="utf8mb4"}, * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), - * @ORM\Index(name="uid", columns={"uid"}) + * @ORM\Index(name="uid", columns={"uid"}), + * @ORM\Index(name="hashedurl", columns={"hashedurl"}) * } * ) * @ORM\HasLifecycleCallbacks() @@ -75,6 +76,13 @@ class Entry */ private $url; + /** + * @var string + * + * @ORM\Column(name="hashedurl", type="text", nullable=true) + */ + private $hashedUrl; + /** * @var bool * @@ -911,4 +919,24 @@ class Entry { return $this->originUrl; } + + /** + * @return string + */ + public function getHashedUrl() + { + return $this->hashedUrl; + } + + /** + * @param mixed $hashedUrl + * + * @return Entry + */ + public function setHashedUrl($hashedUrl) + { + $this->hashedUrl = $hashedUrl; + + return $this; + } } diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 31953f12..0534d27b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -248,6 +248,8 @@ class ContentProxy { $this->updateOriginUrl($entry, $content['url']); + $entry->setHashedUrl(hash('md5', $entry->getUrl())); + $this->setEntryDomainName($entry); if (!empty($content['title'])) { diff --git a/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php b/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php index 2151f587..8d96d7b8 100644 --- a/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php +++ b/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php @@ -987,6 +987,8 @@ class EntryRestControllerTest extends WallabagApiTestCase { $this->client->request('GET', '/api/entries/exists?url=http://0.0.0.0/entry2'); + $this->client->request('GET', '/api/entries/exists?hashedurl=' . hash('md5', 'http://0.0.0.0/entry2')); + $this->assertSame(200, $this->client->getResponse()->getStatusCode()); $content = json_decode($this->client->getResponse()->getContent(), true); @@ -994,10 +996,22 @@ class EntryRestControllerTest extends WallabagApiTestCase $this->assertTrue($content['exists']); } + public function testGetEntriesExistsWithHash() + { + $this->client->request('GET', '/api/entries/exists?hashedurl=' . hash('md5', 'http://0.0.0.0/entry2')); + + $this->assertSame(200, $this->client->getResponse()->getStatusCode()); + + $content = json_decode($this->client->getResponse()->getContent(), true); + + $this->assertSame(2, $content['exists']); + } + public function testGetEntriesExistsWithManyUrls() { $url1 = 'http://0.0.0.0/entry2'; $url2 = 'http://0.0.0.0/entry10'; + $this->client->request('GET', '/api/entries/exists?urls[]=' . $url1 . '&urls[]=' . $url2 . '&return_id=1'); $this->assertSame(200, $this->client->getResponse()->getStatusCode()); @@ -1027,9 +1041,46 @@ class EntryRestControllerTest extends WallabagApiTestCase $this->assertFalse($content[$url2]); } + public function testGetEntriesExistsWithManyUrlsHashed() + { + $url1 = 'http://0.0.0.0/entry2'; + $url2 = 'http://0.0.0.0/entry10'; + $this->client->request('GET', '/api/entries/exists?hashedurls[]='.hash('md5',$url1).'&hashedurls[]='.hash('md5',$url2) . '&return_id=1'); + + $this->assertSame(200, $this->client->getResponse()->getStatusCode()); + + $content = json_decode($this->client->getResponse()->getContent(), true); + + $this->assertArrayHasKey($url1, $content); + $this->assertArrayHasKey($url2, $content); + $this->assertSame(2, $content[$url1]); + $this->assertNull($content[$url2]); + + $this->assertArrayHasKey(hash('md5', $url1), $content); + $this->assertArrayHasKey(hash('md5', $url2), $content); + $this->assertEquals(2, $content[hash('md5', $url1)]); + $this->assertEquals(false, $content[hash('md5', $url2)]); + } + + public function testGetEntriesExistsWithManyUrlsHashedReturnBool() + { + $url1 = 'http://0.0.0.0/entry2'; + $url2 = 'http://0.0.0.0/entry10'; + $this->client->request('GET', '/api/entries/exists?hashedurls[]='.hash('md5',$url1).'&hashedurls[]='.hash('md5',$url2)); + + $this->assertSame(200, $this->client->getResponse()->getStatusCode()); + + $content = json_decode($this->client->getResponse()->getContent(), true); + + $this->assertArrayHasKey($url1, $content); + $this->assertArrayHasKey($url2, $content); + $this->assertTrue($content[$url1]); + $this->assertFalse($content[$url2]); + } + public function testGetEntriesExistsWhichDoesNotExists() { - $this->client->request('GET', '/api/entries/exists?url=http://google.com/entry2'); + $this->client->request('GET', '/api/entries/exists?hashedurl='.hash('md5','http://google.com/entry2')); $this->assertSame(200, $this->client->getResponse()->getStatusCode()); @@ -1040,7 +1091,7 @@ class EntryRestControllerTest extends WallabagApiTestCase public function testGetEntriesExistsWithNoUrl() { - $this->client->request('GET', '/api/entries/exists?url='); + $this->client->request('GET', '/api/entries/exists?hashedurl='); $this->assertSame(403, $this->client->getResponse()->getStatusCode()); } diff --git a/tests/Wallabag/CoreBundle/Command/GenerateUrlHashesCommandTest.php b/tests/Wallabag/CoreBundle/Command/GenerateUrlHashesCommandTest.php new file mode 100644 index 00000000..8ca772cb --- /dev/null +++ b/tests/Wallabag/CoreBundle/Command/GenerateUrlHashesCommandTest.php @@ -0,0 +1,101 @@ +getClient()->getKernel()); + $application->add(new GenerateUrlHashesCommand()); + + $command = $application->find('wallabag:generate-hashed-urls'); + + $tester = new CommandTester($command); + $tester->execute([ + 'command' => $command->getName(), + ]); + + $this->assertContains('Generating hashed urls for the 3 user account entries', $tester->getDisplay()); + $this->assertContains('Finished generated hashed urls', $tester->getDisplay()); + } + + public function testRunGenerateUrlHashesCommandWithBadUsername() + { + $application = new Application($this->getClient()->getKernel()); + $application->add(new GenerateUrlHashesCommand()); + + $command = $application->find('wallabag:generate-hashed-urls'); + + $tester = new CommandTester($command); + $tester->execute([ + 'command' => $command->getName(), + 'username' => 'unknown', + ]); + + $this->assertContains('User "unknown" not found', $tester->getDisplay()); + } + + public function testRunGenerateUrlHashesCommandForUser() + { + $application = new Application($this->getClient()->getKernel()); + $application->add(new GenerateUrlHashesCommand()); + + $command = $application->find('wallabag:generate-hashed-urls'); + + $tester = new CommandTester($command); + $tester->execute([ + 'command' => $command->getName(), + 'username' => 'admin', + ]); + + $this->assertContains('Generated hashed urls for user admin', $tester->getDisplay()); + } + + public function testGenerateUrls() + { + $url = 'http://www.lemonde.fr/sport/visuel/2017/05/05/rondelle-prison-blanchissage-comprendre-le-hockey-sur-glace_5122587_3242.html'; + $client = $this->getClient(); + $em = $client->getContainer()->get('doctrine.orm.entity_manager'); + + $this->logInAs('admin'); + + $user = $em->getRepository('WallabagUserBundle:User')->findOneById($this->getLoggedInUserId()); + + $entry1 = new Entry($user); + $entry1->setUrl($url); + + $em->persist($entry1); + + $em->flush(); + + $this->assertNull($entry1->getHashedUrl()); + + $application = new Application($this->getClient()->getKernel()); + $application->add(new GenerateUrlHashesCommand()); + + $command = $application->find('wallabag:generate-hashed-urls'); + + $tester = new CommandTester($command); + $tester->execute([ + 'command' => $command->getName(), + 'username' => 'admin', + ]); + + $this->assertContains('Generated hashed urls for user admin', $tester->getDisplay()); + + $entry = $em->getRepository('WallabagCoreBundle:Entry')->findOneByUrl($url); + + $this->assertEquals($entry->getHashedUrl(), hash('sha512', $url)); + + $query = $em->createQuery('DELETE FROM Wallabag\CoreBundle\Entity\Entry e WHERE e.url = :url'); + $query->setParameter('url', $url); + $query->execute(); + } +} -- 2.41.0