diff options
author | Thomas Citharel <tcit@tcit.fr> | 2016-09-21 17:47:47 +0200 |
---|---|---|
committer | Jeremy Benoist <jeremy.benoist@gmail.com> | 2016-09-25 12:29:18 +0200 |
commit | 59201088b4fc13fd361238396f630dabd9bd1990 (patch) | |
tree | 2d4d5c2fbe7f007214c41f0c4ccba2f8d3d7ec8b /src/Wallabag/ImportBundle/Import | |
parent | f7c55b38122cc593c2b58bb6425fca9d243b055e (diff) | |
download | wallabag-59201088b4fc13fd361238396f630dabd9bd1990.tar.gz wallabag-59201088b4fc13fd361238396f630dabd9bd1990.tar.zst wallabag-59201088b4fc13fd361238396f630dabd9bd1990.zip |
bring chrome and firefox as separate imports
Diffstat (limited to 'src/Wallabag/ImportBundle/Import')
-rw-r--r-- | src/Wallabag/ImportBundle/Import/BrowserImport.php | 247 | ||||
-rw-r--r-- | src/Wallabag/ImportBundle/Import/ChromeImport.php | 71 | ||||
-rw-r--r-- | src/Wallabag/ImportBundle/Import/FirefoxImport.php | 71 |
3 files changed, 262 insertions, 127 deletions
diff --git a/src/Wallabag/ImportBundle/Import/BrowserImport.php b/src/Wallabag/ImportBundle/Import/BrowserImport.php index e3457196..ef7d6d95 100644 --- a/src/Wallabag/ImportBundle/Import/BrowserImport.php +++ b/src/Wallabag/ImportBundle/Import/BrowserImport.php | |||
@@ -9,69 +9,24 @@ use Wallabag\CoreBundle\Entity\Entry; | |||
9 | use Wallabag\UserBundle\Entity\User; | 9 | use Wallabag\UserBundle\Entity\User; |
10 | use Wallabag\CoreBundle\Helper\ContentProxy; | 10 | use Wallabag\CoreBundle\Helper\ContentProxy; |
11 | 11 | ||
12 | class BrowserImport implements ImportInterface | 12 | abstract class BrowserImport extends AbstractImport |
13 | { | 13 | { |
14 | protected $user; | ||
15 | protected $em; | ||
16 | protected $logger; | ||
17 | protected $contentProxy; | ||
18 | protected $skippedEntries = 0; | ||
19 | protected $importedEntries = 0; | ||
20 | protected $totalEntries = 0; | ||
21 | protected $filepath; | 14 | protected $filepath; |
22 | protected $markAsRead; | ||
23 | private $nbEntries; | ||
24 | |||
25 | public function __construct(EntityManager $em, ContentProxy $contentProxy) | ||
26 | { | ||
27 | $this->em = $em; | ||
28 | $this->logger = new NullLogger(); | ||
29 | $this->contentProxy = $contentProxy; | ||
30 | } | ||
31 | |||
32 | public function setLogger(LoggerInterface $logger) | ||
33 | { | ||
34 | $this->logger = $logger; | ||
35 | } | ||
36 | |||
37 | /** | ||
38 | * We define the user in a custom call because on the import command there is no logged in user. | ||
39 | * So we can't retrieve user from the `security.token_storage` service. | ||
40 | * | ||
41 | * @param User $user | ||
42 | * | ||
43 | * @return $this | ||
44 | */ | ||
45 | public function setUser(User $user) | ||
46 | { | ||
47 | $this->user = $user; | ||
48 | |||
49 | return $this; | ||
50 | } | ||
51 | 15 | ||
52 | /** | 16 | /** |
53 | * {@inheritdoc} | 17 | * {@inheritdoc} |
54 | */ | 18 | */ |
55 | public function getName() | 19 | abstract public function getName(); |
56 | { | ||
57 | return 'Firefox & Google Chrome'; | ||
58 | } | ||
59 | 20 | ||
60 | /** | 21 | /** |
61 | * {@inheritdoc} | 22 | * {@inheritdoc} |
62 | */ | 23 | */ |
63 | public function getUrl() | 24 | abstract public function getUrl(); |
64 | { | ||
65 | return 'import_browser'; | ||
66 | } | ||
67 | 25 | ||
68 | /** | 26 | /** |
69 | * {@inheritdoc} | 27 | * {@inheritdoc} |
70 | */ | 28 | */ |
71 | public function getDescription() | 29 | abstract public function getDescription(); |
72 | { | ||
73 | return 'import.browser.description'; | ||
74 | } | ||
75 | 30 | ||
76 | /** | 31 | /** |
77 | * {@inheritdoc} | 32 | * {@inheritdoc} |
@@ -96,124 +51,162 @@ class BrowserImport implements ImportInterface | |||
96 | return false; | 51 | return false; |
97 | } | 52 | } |
98 | 53 | ||
99 | $this->nbEntries = 1; | 54 | if ($this->producer) { |
55 | $this->parseEntriesForProducer($data); | ||
56 | |||
57 | return true; | ||
58 | } | ||
59 | |||
100 | $this->parseEntries($data); | 60 | $this->parseEntries($data); |
101 | $this->em->flush(); | ||
102 | 61 | ||
103 | return true; | 62 | return true; |
104 | } | 63 | } |
105 | 64 | ||
106 | private function parseEntries($data) | 65 | /** |
66 | * Set file path to the json file. | ||
67 | * | ||
68 | * @param string $filepath | ||
69 | */ | ||
70 | public function setFilepath($filepath) | ||
71 | { | ||
72 | $this->filepath = $filepath; | ||
73 | |||
74 | return $this; | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * Parse and insert all given entries. | ||
79 | * | ||
80 | * @param $entries | ||
81 | */ | ||
82 | protected function parseEntries($entries) | ||
107 | { | 83 | { |
108 | foreach ($data as $importedEntry) { | 84 | $i = 1; |
109 | $this->parseEntry($importedEntry); | 85 | |
86 | foreach ($entries as $importedEntry) { | ||
87 | if ((array) $importedEntry !== $importedEntry) { | ||
88 | continue; | ||
89 | } | ||
90 | |||
91 | $entry = $this->parseEntry($importedEntry); | ||
92 | |||
93 | if (null === $entry) { | ||
94 | continue; | ||
95 | } | ||
96 | |||
97 | // flush every 20 entries | ||
98 | if (($i % 20) === 0) { | ||
99 | $this->em->flush(); | ||
100 | |||
101 | // clear only affected entities | ||
102 | $this->em->clear(Entry::class); | ||
103 | $this->em->clear(Tag::class); | ||
104 | } | ||
105 | ++$i; | ||
110 | } | 106 | } |
111 | $this->totalEntries += count($data); | 107 | |
108 | $this->em->flush(); | ||
112 | } | 109 | } |
113 | 110 | ||
114 | private function parseEntry($importedEntry) | 111 | /** |
112 | * Parse entries and send them to the queue. | ||
113 | * It should just be a simple loop on all item, no call to the database should be done | ||
114 | * to speedup queuing. | ||
115 | * | ||
116 | * Faster parse entries for Producer. | ||
117 | * We don't care to make check at this time. They'll be done by the consumer. | ||
118 | * | ||
119 | * @param array $entries | ||
120 | */ | ||
121 | protected function parseEntriesForProducer(array $entries) | ||
115 | { | 122 | { |
116 | if (!is_array($importedEntry)) { | 123 | foreach ($entries as $importedEntry) { |
117 | return; | 124 | |
125 | if ((array) $importedEntry !== $importedEntry) { | ||
126 | continue; | ||
127 | } | ||
128 | |||
129 | // set userId for the producer (it won't know which user is connected) | ||
130 | $importedEntry['userId'] = $this->user->getId(); | ||
131 | |||
132 | if ($this->markAsRead) { | ||
133 | $importedEntry = $this->setEntryAsRead($importedEntry); | ||
134 | } | ||
135 | |||
136 | ++$this->queuedEntries; | ||
137 | |||
138 | $this->producer->publish(json_encode($importedEntry)); | ||
118 | } | 139 | } |
140 | } | ||
119 | 141 | ||
120 | /* Firefox uses guid while Chrome uses id */ | 142 | /** |
143 | * {@inheritdoc} | ||
144 | */ | ||
145 | public function parseEntry(array $importedEntry) | ||
146 | { | ||
121 | 147 | ||
122 | if ((!key_exists('guid', $importedEntry) || (!key_exists('id', $importedEntry))) && is_array(reset($importedEntry))) { | 148 | if ((!key_exists('guid', $importedEntry) || (!key_exists('id', $importedEntry))) && is_array(reset($importedEntry))) { |
123 | $this->parseEntries($importedEntry); | 149 | $this->parseEntries($importedEntry); |
124 | |||
125 | return; | 150 | return; |
126 | } | 151 | } |
152 | |||
127 | if (key_exists('children', $importedEntry)) { | 153 | if (key_exists('children', $importedEntry)) { |
128 | $this->parseEntries($importedEntry['children']); | 154 | $this->parseEntries($importedEntry['children']); |
129 | |||
130 | return; | 155 | return; |
131 | } | 156 | } |
132 | if (key_exists('uri', $importedEntry) || key_exists('url', $importedEntry)) { | ||
133 | |||
134 | /* Firefox uses uri while Chrome uses url */ | ||
135 | 157 | ||
136 | $firefox = key_exists('uri', $importedEntry); | 158 | if (!key_exists('uri', $importedEntry) && !key_exists('url', $importedEntry)) { |
137 | 159 | return; | |
138 | $existingEntry = $this->em | 160 | } |
139 | ->getRepository('WallabagCoreBundle:Entry') | ||
140 | ->findByUrlAndUserId(($firefox) ? $importedEntry['uri'] : $importedEntry['url'], $this->user->getId()); | ||
141 | |||
142 | if (false !== $existingEntry) { | ||
143 | ++$this->skippedEntries; | ||
144 | 161 | ||
145 | return; | 162 | $firefox = key_exists('uri', $importedEntry); |
146 | } | ||
147 | 163 | ||
148 | if (false === parse_url(($firefox) ? $importedEntry['uri'] : $importedEntry['url']) || false === filter_var(($firefox) ? $importedEntry['uri'] : $importedEntry['url'], FILTER_VALIDATE_URL)) { | 164 | $existingEntry = $this->em |
149 | $this->logger->warning('Imported URL '.($firefox) ? $importedEntry['uri'] : $importedEntry['url'].' is not valid'); | 165 | ->getRepository('WallabagCoreBundle:Entry') |
150 | ++$this->skippedEntries; | 166 | ->findByUrlAndUserId(($firefox) ? $importedEntry['uri'] : $importedEntry['url'], $this->user->getId()); |
151 | 167 | ||
152 | return; | 168 | if (false !== $existingEntry) { |
153 | } | 169 | ++$this->skippedEntries; |
154 | 170 | ||
155 | try { | 171 | return; |
156 | $entry = $this->contentProxy->updateEntry( | 172 | } |
157 | new Entry($this->user), | ||
158 | ($firefox) ? $importedEntry['uri'] : $importedEntry['url'] | ||
159 | ); | ||
160 | } catch (\Exception $e) { | ||
161 | $this->logger->warning('Error while saving '.($firefox) ? $importedEntry['uri'] : $importedEntry['url']); | ||
162 | ++$this->skippedEntries; | ||
163 | 173 | ||
164 | return; | 174 | $data = $this->prepareEntry($importedEntry); |
165 | } | ||
166 | 175 | ||
167 | $entry->setArchived($this->markAsRead); | 176 | $entry = new Entry($this->user); |
177 | $entry->setUrl($data['url']); | ||
178 | $entry->setTitle($data['title']); | ||
168 | 179 | ||
169 | $this->em->persist($entry); | 180 | // update entry with content (in case fetching failed, the given entry will be return) |
170 | ++$this->importedEntries; | 181 | $entry = $this->fetchContent($entry, $data['url'], $data); |
171 | 182 | ||
172 | // flush every 20 entries | 183 | if (array_key_exists('tags', $data)) { |
173 | if (($this->nbEntries % 20) === 0) { | 184 | $this->contentProxy->assignTagsToEntry( |
174 | $this->em->flush(); | 185 | $entry, |
175 | $this->em->clear($entry); | 186 | $data['tags'] |
176 | } | 187 | ); |
177 | ++$this->nbEntries; | ||
178 | } | 188 | } |
179 | } | ||
180 | 189 | ||
181 | /** | 190 | $entry->setArchived($data['is_archived']); |
182 | * Set whether articles must be all marked as read. | ||
183 | * | ||
184 | * @param bool $markAsRead | ||
185 | * | ||
186 | * @return $this | ||
187 | */ | ||
188 | public function setMarkAsRead($markAsRead) | ||
189 | { | ||
190 | $this->markAsRead = $markAsRead; | ||
191 | 191 | ||
192 | return $this; | 192 | if (!empty($data['created_at'])) { |
193 | } | 193 | $dt = new \DateTime(); |
194 | $entry->setCreatedAt($dt->setTimestamp($data['created_at']/1000)); | ||
195 | } | ||
194 | 196 | ||
195 | /** | 197 | $this->em->persist($entry); |
196 | * Set file path to the json file. | 198 | ++$this->importedEntries; |
197 | * | ||
198 | * @param string $filepath | ||
199 | * | ||
200 | * @return $this | ||
201 | */ | ||
202 | public function setFilepath($filepath) | ||
203 | { | ||
204 | $this->filepath = $filepath; | ||
205 | 199 | ||
206 | return $this; | 200 | return $entry; |
207 | } | 201 | } |
208 | 202 | ||
209 | /** | 203 | /** |
210 | * {@inheritdoc} | 204 | * {@inheritdoc} |
211 | */ | 205 | */ |
212 | public function getSummary() | 206 | protected function setEntryAsRead(array $importedEntry) |
213 | { | 207 | { |
214 | return [ | 208 | $importedEntry['is_archived'] = 1; |
215 | 'skipped' => $this->skippedEntries, | 209 | |
216 | 'imported' => $this->importedEntries, | 210 | return $importedEntry; |
217 | ]; | ||
218 | } | 211 | } |
219 | } | 212 | } |
diff --git a/src/Wallabag/ImportBundle/Import/ChromeImport.php b/src/Wallabag/ImportBundle/Import/ChromeImport.php new file mode 100644 index 00000000..7936ee2f --- /dev/null +++ b/src/Wallabag/ImportBundle/Import/ChromeImport.php | |||
@@ -0,0 +1,71 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Wallabag\ImportBundle\Import; | ||
4 | |||
5 | use Psr\Log\LoggerInterface; | ||
6 | use Psr\Log\NullLogger; | ||
7 | use Doctrine\ORM\EntityManager; | ||
8 | use Wallabag\CoreBundle\Entity\Entry; | ||
9 | use Wallabag\UserBundle\Entity\User; | ||
10 | use Wallabag\CoreBundle\Helper\ContentProxy; | ||
11 | |||
12 | class ChromeImport extends BrowserImport | ||
13 | { | ||
14 | protected $filepath; | ||
15 | |||
16 | /** | ||
17 | * {@inheritdoc} | ||
18 | */ | ||
19 | public function getName() | ||
20 | { | ||
21 | return 'Chrome'; | ||
22 | } | ||
23 | |||
24 | /** | ||
25 | * {@inheritdoc} | ||
26 | */ | ||
27 | public function getUrl() | ||
28 | { | ||
29 | return 'import_chrome'; | ||
30 | } | ||
31 | |||
32 | /** | ||
33 | * {@inheritdoc} | ||
34 | */ | ||
35 | public function getDescription() | ||
36 | { | ||
37 | return 'import.chrome.description'; | ||
38 | } | ||
39 | |||
40 | /** | ||
41 | * {@inheritdoc} | ||
42 | */ | ||
43 | protected function prepareEntry($entry = []) | ||
44 | { | ||
45 | $data = [ | ||
46 | 'title' => $entry['name'], | ||
47 | 'html' => '', | ||
48 | 'url' => $entry['url'], | ||
49 | 'is_archived' => $this->markAsRead, | ||
50 | 'tags' => '', | ||
51 | 'created_at' => $entry['date_added'], | ||
52 | ]; | ||
53 | |||
54 | if (array_key_exists('tags', $entry) && $entry['tags'] != '') { | ||
55 | $data['tags'] = $entry['tags']; | ||
56 | } | ||
57 | |||
58 | return $data; | ||
59 | } | ||
60 | |||
61 | |||
62 | /** | ||
63 | * {@inheritdoc} | ||
64 | */ | ||
65 | protected function setEntryAsRead(array $importedEntry) | ||
66 | { | ||
67 | $importedEntry['is_archived'] = 1; | ||
68 | |||
69 | return $importedEntry; | ||
70 | } | ||
71 | } | ||
diff --git a/src/Wallabag/ImportBundle/Import/FirefoxImport.php b/src/Wallabag/ImportBundle/Import/FirefoxImport.php new file mode 100644 index 00000000..cbf10b87 --- /dev/null +++ b/src/Wallabag/ImportBundle/Import/FirefoxImport.php | |||
@@ -0,0 +1,71 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Wallabag\ImportBundle\Import; | ||
4 | |||
5 | use Psr\Log\LoggerInterface; | ||
6 | use Psr\Log\NullLogger; | ||
7 | use Doctrine\ORM\EntityManager; | ||
8 | use Wallabag\CoreBundle\Entity\Entry; | ||
9 | use Wallabag\UserBundle\Entity\User; | ||
10 | use Wallabag\CoreBundle\Helper\ContentProxy; | ||
11 | |||
12 | class FirefoxImport extends BrowserImport | ||
13 | { | ||
14 | protected $filepath; | ||
15 | |||
16 | /** | ||
17 | * {@inheritdoc} | ||
18 | */ | ||
19 | public function getName() | ||
20 | { | ||
21 | return 'Firefox'; | ||
22 | } | ||
23 | |||
24 | /** | ||
25 | * {@inheritdoc} | ||
26 | */ | ||
27 | public function getUrl() | ||
28 | { | ||
29 | return 'import_firefox'; | ||
30 | } | ||
31 | |||
32 | /** | ||
33 | * {@inheritdoc} | ||
34 | */ | ||
35 | public function getDescription() | ||
36 | { | ||
37 | return 'import.firefox.description'; | ||
38 | } | ||
39 | |||
40 | /** | ||
41 | * {@inheritdoc} | ||
42 | */ | ||
43 | protected function prepareEntry($entry = []) | ||
44 | { | ||
45 | $data = [ | ||
46 | 'title' => $entry['name'], | ||
47 | 'html' => '', | ||
48 | 'url' => $entry['url'], | ||
49 | 'is_archived' => $this->markAsRead, | ||
50 | 'tags' => '', | ||
51 | 'created_at' => $entry['date_added'], | ||
52 | ]; | ||
53 | |||
54 | if (array_key_exists('tags', $entry) && $entry['tags'] != '') { | ||
55 | $data['tags'] = $entry['tags']; | ||
56 | } | ||
57 | |||
58 | return $data; | ||
59 | } | ||
60 | |||
61 | |||
62 | /** | ||
63 | * {@inheritdoc} | ||
64 | */ | ||
65 | protected function setEntryAsRead(array $importedEntry) | ||
66 | { | ||
67 | $importedEntry['is_archived'] = 1; | ||
68 | |||
69 | return $importedEntry; | ||
70 | } | ||
71 | } | ||