From e2f3800ccb884682547769d9e4b5d6b7cafe4e07 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Fri, 24 Feb 2017 11:27:03 +0100 Subject: Add Clean Duplicates Command --- .../CoreBundle/Command/CleanDuplicatesCommand.php | 119 +++++++++++++++++++++ .../CoreBundle/Repository/EntryRepository.php | 13 +++ 2 files changed, 132 insertions(+) create mode 100644 src/Wallabag/CoreBundle/Command/CleanDuplicatesCommand.php (limited to 'src/Wallabag/CoreBundle') diff --git a/src/Wallabag/CoreBundle/Command/CleanDuplicatesCommand.php b/src/Wallabag/CoreBundle/Command/CleanDuplicatesCommand.php new file mode 100644 index 00000000..65f35d8e --- /dev/null +++ b/src/Wallabag/CoreBundle/Command/CleanDuplicatesCommand.php @@ -0,0 +1,119 @@ +setName('wallabag:clean-duplicates') + ->setDescription('Cleans the database for duplicates') + ->setHelp('This command helps you to clean your articles list in case of duplicates') + ->addArgument( + 'username', + InputArgument::OPTIONAL, + 'User to clean' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $this->output = $output; + + $username = $input->getArgument('username'); + + if ($username) { + try { + $user = $this->getUser($username); + $this->cleanDuplicates($user); + } catch (NoResultException $e) { + $output->writeln(sprintf('User "%s" not found.', $username)); + + return 1; + } + } else { + $users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll(); + + $output->writeln(sprintf('Cleaning through %d user accounts', count($users))); + + foreach ($users as $user) { + $output->writeln(sprintf('Processing user %s', $user->getUsername())); + $this->cleanDuplicates($user); + } + $output->writeln(sprintf('Finished cleaning. %d duplicates found in total', $this->duplicates)); + } + + return 0; + } + + /** + * @param User $user + */ + private function cleanDuplicates(User $user) + { + $em = $this->getContainer()->get('doctrine.orm.entity_manager'); + $repo = $this->getDoctrine()->getRepository('WallabagCoreBundle:Entry'); + + $entries = $repo->getAllEntriesIdAndUrl($user->getId()); + + $duplicatesCount = 0; + $urls = []; + foreach ($entries as $entry) { + $url = $this->similarUrl($entry['url']); + + /* @var $entry Entry */ + if (in_array($url, $urls)) { + ++$duplicatesCount; + + $em->remove($repo->find($entry['id'])); + $em->flush(); // Flushing at the end of the loop would require the instance not being online + } else { + $urls[] = $entry['url']; + } + } + + $this->duplicates += $duplicatesCount; + + $this->output->writeln(sprintf('Cleaned %d duplicates for user %s', $duplicatesCount, $user->getUserName())); + } + + private function similarUrl($url) + { + if (in_array(substr($url, -1), ['/', '#'])) { // get rid of "/" and "#" and the end of urls + return substr($url, 0, strlen($url)); + } + + return $url; + } + + /** + * Fetches a user from its username. + * + * @param string $username + * + * @return \Wallabag\UserBundle\Entity\User + */ + private function getUser($username) + { + return $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findOneByUserName($username); + } + + private function getDoctrine() + { + return $this->getContainer()->get('doctrine'); + } +} diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 1f22e901..5e7b0d3a 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -379,4 +379,17 @@ class EntryRepository extends EntityRepository ->setParameter('userId', $userId) ->execute(); } + + /** + * Get id and url from all entries + * Used for the clean-duplicates command. + */ + public function getAllEntriesIdAndUrl($userId) + { + $qb = $this->createQueryBuilder('e') + ->select('e.id, e.url') + ->where('e.user = :userid')->setParameter(':userid', $userId); + + return $qb->getQuery()->getArrayResult(); + } } -- cgit v1.2.3 From d09fe4d233477d5cb9bfc613799b05a7ca14e270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Fri, 5 May 2017 14:33:36 +0200 Subject: Added test for deduplication --- .../CoreBundle/Repository/EntryRepository.php | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'src/Wallabag/CoreBundle') diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 5e7b0d3a..2e03fa19 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -392,4 +392,23 @@ class EntryRepository extends EntityRepository return $qb->getQuery()->getArrayResult(); } + + /** + * Find all entries by url and owner. + * + * @param $url + * @param $userId + * + * @return array + */ + public function findAllByUrlAndUserId($url, $userId) + { + $res = $this->createQueryBuilder('e') + ->where('e.url = :url')->setParameter('url', urldecode($url)) + ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) + ->getQuery() + ->getResult(); + + return $res; + } } -- cgit v1.2.3 From 89f108b45ae94cd827595461b39f869111092579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Fri, 5 May 2017 14:54:03 +0200 Subject: Fixed @j0k3r review --- src/Wallabag/CoreBundle/Repository/EntryRepository.php | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/Wallabag/CoreBundle') diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 2e03fa19..6972e974 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php @@ -403,12 +403,10 @@ class EntryRepository extends EntityRepository */ public function findAllByUrlAndUserId($url, $userId) { - $res = $this->createQueryBuilder('e') + return $this->createQueryBuilder('e') ->where('e.url = :url')->setParameter('url', urldecode($url)) ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) ->getQuery() ->getResult(); - - return $res; } } -- cgit v1.2.3