From d5fbb570c974fe8b6f64356772f7cd60b96419da Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Sun, 28 May 2017 14:53:04 +0200 Subject: Hash the urls to check if they exist Signed-off-by: Thomas Citharel --- .../ApiBundle/Controller/EntryRestController.php | 27 +++--- .../Command/GenerateUrlHashesCommand.php | 95 ++++++++++++++++++++++ .../CoreBundle/DataFixtures/ORM/LoadEntryData.php | 1 + src/Wallabag/CoreBundle/Entity/Entry.php | 30 ++++++- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 1 + .../Controller/EntryRestControllerTest.php | 16 ++-- 6 files changed, 150 insertions(+), 20 deletions(-) create mode 100644 src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index 4801811d..2b07cb59 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -21,8 +21,8 @@ class EntryRestController extends WallabagRestController * * @ApiDoc( * parameters={ - * {"name"="url", "dataType"="string", "required"=true, "format"="An url", "description"="Url to check if it exists"}, - * {"name"="urls", "dataType"="string", "required"=false, "format"="An array of urls (?urls[]=http...&urls[]=http...)", "description"="Urls (as an array) to check if it exists"} + * {"name"="hashedurl", "dataType"="string", "required"=true, "format"="An url", "description"="SHA512 Url to check if it exists"}, + * {"name"="hashedurls", "dataType"="string", "required"=false, "format"="An array of urls (?urls[]=http...&urls[]=http...)", "description"="SHA512 Urls (as an array) to check if it exists"} * } * ) * @@ -32,33 +32,38 @@ class EntryRestController extends WallabagRestController { $this->validateAuthentication(); - $urls = $request->query->get('urls', []); + $hashedUrls = $request->query->get('hashedurls', []); // handle multiple urls first - if (!empty($urls)) { + if (!empty($hashedUrls)) { $results = []; - foreach ($urls as $url) { + foreach ($hashedUrls as $hashedUrl) { $res = $this->getDoctrine() ->getRepository('WallabagCoreBundle:Entry') - ->findByUrlAndUserId($url, $this->getUser()->getId()); + ->findOneBy([ + 'hashedUrl' => $hashedUrl, + 'user' => $this->getUser()->getId(), + ]); - $results[$url] = $res instanceof Entry ? $res->getId() : false; + $results[$hashedUrl] = $res instanceof Entry ? $res->getId() : false; } return $this->sendResponse($results); } // let's see if it is a simple url? - $url = $request->query->get('url', ''); + $hashedUrl = $request->query->get('hashedurl', ''); - if (empty($url)) { + if (empty($hashedUrl)) { throw $this->createAccessDeniedException('URL is empty?, logged user id: '.$this->getUser()->getId()); } $res = $this->getDoctrine() ->getRepository('WallabagCoreBundle:Entry') - ->findByUrlAndUserId($url, $this->getUser()->getId()); - + ->findOneBy([ + 'hashedUrl' => $hashedUrl, + 'user' => $this->getUser()->getId(), + ]); $exists = $res instanceof Entry ? $res->getId() : false; return $this->sendResponse(['exists' => $exists]); diff --git a/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php new file mode 100644 index 00000000..fe2644f2 --- /dev/null +++ b/src/Wallabag/CoreBundle/Command/GenerateUrlHashesCommand.php @@ -0,0 +1,95 @@ +setName('wallabag:generate-hashed-urls') + ->setDescription('Generates hashed urls for each entry') + ->setHelp('This command helps you to generates hashes of the url of each entry, to check through API if an URL is already saved') + ->addArgument( + 'username', + InputArgument::OPTIONAL, + 'User to process entries' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $this->output = $output; + + $username = $input->getArgument('username'); + + if ($username) { + try { + $user = $this->getUser($username); + $this->generateHashedUrls($user); + } catch (NoResultException $e) { + $output->writeln(sprintf('User "%s" not found.', $username)); + + return 1; + } + } else { + $users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll(); + + $output->writeln(sprintf('Generating hashed urls for the %d user account entries', count($users))); + + foreach ($users as $user) { + $output->writeln(sprintf('Processing user %s', $user->getUsername())); + $this->generateHashedUrls($user); + } + $output->writeln(sprintf('Finished generated hashed urls')); + } + + return 0; + } + + /** + * @param User $user + */ + private function generateHashedUrls(User $user) + { + $em = $this->getContainer()->get('doctrine.orm.entity_manager'); + $repo = $this->getDoctrine()->getRepository('WallabagCoreBundle:Entry'); + + $entries = $repo->findByUser($user->getId()); + + foreach ($entries as $entry) { + $entry->setHashedUrl(hash('sha512', $entry->getUrl())); + $em->persist($entry); + $em->flush(); + } + + $this->output->writeln(sprintf('Generated hashed urls for user %s', $user->getUserName())); + } + + /** + * Fetches a user from its username. + * + * @param string $username + * + * @return \Wallabag\UserBundle\Entity\User + */ + private function getUser($username) + { + return $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findOneByUserName($username); + } + + private function getDoctrine() + { + return $this->getContainer()->get('doctrine'); + } +} diff --git a/src/Wallabag/CoreBundle/DataFixtures/ORM/LoadEntryData.php b/src/Wallabag/CoreBundle/DataFixtures/ORM/LoadEntryData.php index fedad009..22882612 100644 --- a/src/Wallabag/CoreBundle/DataFixtures/ORM/LoadEntryData.php +++ b/src/Wallabag/CoreBundle/DataFixtures/ORM/LoadEntryData.php @@ -32,6 +32,7 @@ class LoadEntryData extends AbstractFixture implements OrderedFixtureInterface $entry2 = new Entry($this->getReference('admin-user')); $entry2->setUrl('http://0.0.0.0/entry2'); + $entry2->setHashedUrl(hash('sha512', 'http://0.0.0.0/entry2')); $entry2->setReadingTime(1); $entry2->setDomainName('domain.io'); $entry2->setMimetype('text/html'); diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 08a67c34..49affe2a 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php @@ -24,7 +24,8 @@ use Wallabag\AnnotationBundle\Entity\Annotation; * options={"collate"="utf8mb4_unicode_ci", "charset"="utf8mb4"}, * indexes={ * @ORM\Index(name="created_at", columns={"created_at"}), - * @ORM\Index(name="uid", columns={"uid"}) + * @ORM\Index(name="uid", columns={"uid"}), + * @ORM\Index(name="hashedurl", columns={"hashedurl"}) * } * ) * @ORM\HasLifecycleCallbacks() @@ -72,6 +73,13 @@ class Entry */ private $url; + /** + * @var string + * + * @ORM\Column(name="hashedurl", type="text", nullable=true) + */ + private $hashedUrl; + /** * @var bool * @@ -763,4 +771,24 @@ class Entry return $this; } + + /** + * @return string + */ + public function getHashedUrl() + { + return $this->hashedUrl; + } + + /** + * @param mixed $hashedUrl + * + * @return Entry + */ + public function setHashedUrl($hashedUrl) + { + $this->hashedUrl = $hashedUrl; + + return $this; + } } diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 9a08db3d..83ecaa66 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -75,6 +75,7 @@ class ContentProxy } $entry->setUrl($content['url'] ?: $url); + $entry->setHashedUrl(hash('sha512', $entry->getUrl())); $entry->setTitle($title); $entry->setContent($html); $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); diff --git a/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php b/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php index bf7d373a..e3a44390 100644 --- a/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php +++ b/tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php @@ -686,7 +686,7 @@ class EntryRestControllerTest extends WallabagApiTestCase public function testGetEntriesExists() { - $this->client->request('GET', '/api/entries/exists?url=http://0.0.0.0/entry2'); + $this->client->request('GET', '/api/entries/exists?hashedurl=' . hash('sha512', 'http://0.0.0.0/entry2')); $this->assertEquals(200, $this->client->getResponse()->getStatusCode()); @@ -699,21 +699,21 @@ class EntryRestControllerTest extends WallabagApiTestCase { $url1 = 'http://0.0.0.0/entry2'; $url2 = 'http://0.0.0.0/entry10'; - $this->client->request('GET', '/api/entries/exists?urls[]='.$url1.'&urls[]='.$url2); + $this->client->request('GET', '/api/entries/exists?hashedurls[]='.hash('sha512',$url1).'&hashedurls[]='.hash('sha512',$url2)); $this->assertEquals(200, $this->client->getResponse()->getStatusCode()); $content = json_decode($this->client->getResponse()->getContent(), true); - $this->assertArrayHasKey($url1, $content); - $this->assertArrayHasKey($url2, $content); - $this->assertEquals(2, $content[$url1]); - $this->assertEquals(false, $content[$url2]); + $this->assertArrayHasKey(hash('sha512', $url1), $content); + $this->assertArrayHasKey(hash('sha512', $url2), $content); + $this->assertEquals(2, $content[hash('sha512', $url1)]); + $this->assertEquals(false, $content[hash('sha512', $url2)]); } public function testGetEntriesExistsWhichDoesNotExists() { - $this->client->request('GET', '/api/entries/exists?url=http://google.com/entry2'); + $this->client->request('GET', '/api/entries/exists?hashedurl='.hash('sha512','http://google.com/entry2')); $this->assertEquals(200, $this->client->getResponse()->getStatusCode()); @@ -724,7 +724,7 @@ class EntryRestControllerTest extends WallabagApiTestCase public function testGetEntriesExistsWithNoUrl() { - $this->client->request('GET', '/api/entries/exists?url='); + $this->client->request('GET', '/api/entries/exists?hashedurl='); $this->assertEquals(403, $this->client->getResponse()->getStatusCode()); } -- cgit v1.2.3