diff options
author | Jérémy Benoist <j0k3r@users.noreply.github.com> | 2017-05-05 17:42:18 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-05 17:42:18 +0200 |
commit | ab742ee9c69f8cf6e6295d6044e05accffc5551d (patch) | |
tree | 503f54518cd9df45434903714eff2a5ab71c8a7c /src | |
parent | 69803049688179e1b03ef424dec91f1b9a4f9e91 (diff) | |
parent | 4eeb29ff784934fa879dd87999e07c4c7626af8c (diff) | |
download | wallabag-ab742ee9c69f8cf6e6295d6044e05accffc5551d.tar.gz wallabag-ab742ee9c69f8cf6e6295d6044e05accffc5551d.tar.zst wallabag-ab742ee9c69f8cf6e6295d6044e05accffc5551d.zip |
Merge pull request #2920 from wallabag/cleanduplicatescommand
Clean Duplicates Command
Diffstat (limited to 'src')
-rw-r--r-- | src/Wallabag/CoreBundle/Command/CleanDuplicatesCommand.php | 119 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Repository/EntryRepository.php | 30 |
2 files changed, 149 insertions, 0 deletions
diff --git a/src/Wallabag/CoreBundle/Command/CleanDuplicatesCommand.php b/src/Wallabag/CoreBundle/Command/CleanDuplicatesCommand.php new file mode 100644 index 00000000..65f35d8e --- /dev/null +++ b/src/Wallabag/CoreBundle/Command/CleanDuplicatesCommand.php | |||
@@ -0,0 +1,119 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Wallabag\CoreBundle\Command; | ||
4 | |||
5 | use Doctrine\ORM\NoResultException; | ||
6 | use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; | ||
7 | use Symfony\Component\Console\Input\InputArgument; | ||
8 | use Symfony\Component\Console\Input\InputInterface; | ||
9 | use Symfony\Component\Console\Output\OutputInterface; | ||
10 | use Wallabag\CoreBundle\Entity\Entry; | ||
11 | use Wallabag\UserBundle\Entity\User; | ||
12 | |||
13 | class CleanDuplicatesCommand extends ContainerAwareCommand | ||
14 | { | ||
15 | /** @var OutputInterface */ | ||
16 | protected $output; | ||
17 | |||
18 | protected $duplicates = 0; | ||
19 | |||
20 | protected function configure() | ||
21 | { | ||
22 | $this | ||
23 | ->setName('wallabag:clean-duplicates') | ||
24 | ->setDescription('Cleans the database for duplicates') | ||
25 | ->setHelp('This command helps you to clean your articles list in case of duplicates') | ||
26 | ->addArgument( | ||
27 | 'username', | ||
28 | InputArgument::OPTIONAL, | ||
29 | 'User to clean' | ||
30 | ); | ||
31 | } | ||
32 | |||
33 | protected function execute(InputInterface $input, OutputInterface $output) | ||
34 | { | ||
35 | $this->output = $output; | ||
36 | |||
37 | $username = $input->getArgument('username'); | ||
38 | |||
39 | if ($username) { | ||
40 | try { | ||
41 | $user = $this->getUser($username); | ||
42 | $this->cleanDuplicates($user); | ||
43 | } catch (NoResultException $e) { | ||
44 | $output->writeln(sprintf('<error>User "%s" not found.</error>', $username)); | ||
45 | |||
46 | return 1; | ||
47 | } | ||
48 | } else { | ||
49 | $users = $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findAll(); | ||
50 | |||
51 | $output->writeln(sprintf('Cleaning through %d user accounts', count($users))); | ||
52 | |||
53 | foreach ($users as $user) { | ||
54 | $output->writeln(sprintf('Processing user %s', $user->getUsername())); | ||
55 | $this->cleanDuplicates($user); | ||
56 | } | ||
57 | $output->writeln(sprintf('Finished cleaning. %d duplicates found in total', $this->duplicates)); | ||
58 | } | ||
59 | |||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | /** | ||
64 | * @param User $user | ||
65 | */ | ||
66 | private function cleanDuplicates(User $user) | ||
67 | { | ||
68 | $em = $this->getContainer()->get('doctrine.orm.entity_manager'); | ||
69 | $repo = $this->getDoctrine()->getRepository('WallabagCoreBundle:Entry'); | ||
70 | |||
71 | $entries = $repo->getAllEntriesIdAndUrl($user->getId()); | ||
72 | |||
73 | $duplicatesCount = 0; | ||
74 | $urls = []; | ||
75 | foreach ($entries as $entry) { | ||
76 | $url = $this->similarUrl($entry['url']); | ||
77 | |||
78 | /* @var $entry Entry */ | ||
79 | if (in_array($url, $urls)) { | ||
80 | ++$duplicatesCount; | ||
81 | |||
82 | $em->remove($repo->find($entry['id'])); | ||
83 | $em->flush(); // Flushing at the end of the loop would require the instance not being online | ||
84 | } else { | ||
85 | $urls[] = $entry['url']; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | $this->duplicates += $duplicatesCount; | ||
90 | |||
91 | $this->output->writeln(sprintf('Cleaned %d duplicates for user %s', $duplicatesCount, $user->getUserName())); | ||
92 | } | ||
93 | |||
94 | private function similarUrl($url) | ||
95 | { | ||
96 | if (in_array(substr($url, -1), ['/', '#'])) { // get rid of "/" and "#" and the end of urls | ||
97 | return substr($url, 0, strlen($url)); | ||
98 | } | ||
99 | |||
100 | return $url; | ||
101 | } | ||
102 | |||
103 | /** | ||
104 | * Fetches a user from its username. | ||
105 | * | ||
106 | * @param string $username | ||
107 | * | ||
108 | * @return \Wallabag\UserBundle\Entity\User | ||
109 | */ | ||
110 | private function getUser($username) | ||
111 | { | ||
112 | return $this->getDoctrine()->getRepository('WallabagUserBundle:User')->findOneByUserName($username); | ||
113 | } | ||
114 | |||
115 | private function getDoctrine() | ||
116 | { | ||
117 | return $this->getContainer()->get('doctrine'); | ||
118 | } | ||
119 | } | ||
diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 1f22e901..6972e974 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php | |||
@@ -379,4 +379,34 @@ class EntryRepository extends EntityRepository | |||
379 | ->setParameter('userId', $userId) | 379 | ->setParameter('userId', $userId) |
380 | ->execute(); | 380 | ->execute(); |
381 | } | 381 | } |
382 | |||
383 | /** | ||
384 | * Get id and url from all entries | ||
385 | * Used for the clean-duplicates command. | ||
386 | */ | ||
387 | public function getAllEntriesIdAndUrl($userId) | ||
388 | { | ||
389 | $qb = $this->createQueryBuilder('e') | ||
390 | ->select('e.id, e.url') | ||
391 | ->where('e.user = :userid')->setParameter(':userid', $userId); | ||
392 | |||
393 | return $qb->getQuery()->getArrayResult(); | ||
394 | } | ||
395 | |||
396 | /** | ||
397 | * Find all entries by url and owner. | ||
398 | * | ||
399 | * @param $url | ||
400 | * @param $userId | ||
401 | * | ||
402 | * @return array | ||
403 | */ | ||
404 | public function findAllByUrlAndUserId($url, $userId) | ||
405 | { | ||
406 | return $this->createQueryBuilder('e') | ||
407 | ->where('e.url = :url')->setParameter('url', urldecode($url)) | ||
408 | ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) | ||
409 | ->getQuery() | ||
410 | ->getResult(); | ||
411 | } | ||
382 | } | 412 | } |