aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/ImportBundle/Import
diff options
context:
space:
mode:
authorThomas Citharel <tcit@tcit.fr>2016-09-21 17:47:47 +0200
committerJeremy Benoist <jeremy.benoist@gmail.com>2016-09-25 12:29:18 +0200
commit59201088b4fc13fd361238396f630dabd9bd1990 (patch)
tree2d4d5c2fbe7f007214c41f0c4ccba2f8d3d7ec8b /src/Wallabag/ImportBundle/Import
parentf7c55b38122cc593c2b58bb6425fca9d243b055e (diff)
downloadwallabag-59201088b4fc13fd361238396f630dabd9bd1990.tar.gz
wallabag-59201088b4fc13fd361238396f630dabd9bd1990.tar.zst
wallabag-59201088b4fc13fd361238396f630dabd9bd1990.zip
bring chrome and firefox as separate imports
Diffstat (limited to 'src/Wallabag/ImportBundle/Import')
-rw-r--r--src/Wallabag/ImportBundle/Import/BrowserImport.php247
-rw-r--r--src/Wallabag/ImportBundle/Import/ChromeImport.php71
-rw-r--r--src/Wallabag/ImportBundle/Import/FirefoxImport.php71
3 files changed, 262 insertions, 127 deletions
diff --git a/src/Wallabag/ImportBundle/Import/BrowserImport.php b/src/Wallabag/ImportBundle/Import/BrowserImport.php
index e3457196..ef7d6d95 100644
--- a/src/Wallabag/ImportBundle/Import/BrowserImport.php
+++ b/src/Wallabag/ImportBundle/Import/BrowserImport.php
@@ -9,69 +9,24 @@ use Wallabag\CoreBundle\Entity\Entry;
9use Wallabag\UserBundle\Entity\User; 9use Wallabag\UserBundle\Entity\User;
10use Wallabag\CoreBundle\Helper\ContentProxy; 10use Wallabag\CoreBundle\Helper\ContentProxy;
11 11
12class BrowserImport implements ImportInterface 12abstract class BrowserImport extends AbstractImport
13{ 13{
14 protected $user;
15 protected $em;
16 protected $logger;
17 protected $contentProxy;
18 protected $skippedEntries = 0;
19 protected $importedEntries = 0;
20 protected $totalEntries = 0;
21 protected $filepath; 14 protected $filepath;
22 protected $markAsRead;
23 private $nbEntries;
24
25 public function __construct(EntityManager $em, ContentProxy $contentProxy)
26 {
27 $this->em = $em;
28 $this->logger = new NullLogger();
29 $this->contentProxy = $contentProxy;
30 }
31
32 public function setLogger(LoggerInterface $logger)
33 {
34 $this->logger = $logger;
35 }
36
37 /**
38 * We define the user in a custom call because on the import command there is no logged in user.
39 * So we can't retrieve user from the `security.token_storage` service.
40 *
41 * @param User $user
42 *
43 * @return $this
44 */
45 public function setUser(User $user)
46 {
47 $this->user = $user;
48
49 return $this;
50 }
51 15
52 /** 16 /**
53 * {@inheritdoc} 17 * {@inheritdoc}
54 */ 18 */
55 public function getName() 19 abstract public function getName();
56 {
57 return 'Firefox & Google Chrome';
58 }
59 20
60 /** 21 /**
61 * {@inheritdoc} 22 * {@inheritdoc}
62 */ 23 */
63 public function getUrl() 24 abstract public function getUrl();
64 {
65 return 'import_browser';
66 }
67 25
68 /** 26 /**
69 * {@inheritdoc} 27 * {@inheritdoc}
70 */ 28 */
71 public function getDescription() 29 abstract public function getDescription();
72 {
73 return 'import.browser.description';
74 }
75 30
76 /** 31 /**
77 * {@inheritdoc} 32 * {@inheritdoc}
@@ -96,124 +51,162 @@ class BrowserImport implements ImportInterface
96 return false; 51 return false;
97 } 52 }
98 53
99 $this->nbEntries = 1; 54 if ($this->producer) {
55 $this->parseEntriesForProducer($data);
56
57 return true;
58 }
59
100 $this->parseEntries($data); 60 $this->parseEntries($data);
101 $this->em->flush();
102 61
103 return true; 62 return true;
104 } 63 }
105 64
106 private function parseEntries($data) 65 /**
66 * Set file path to the json file.
67 *
68 * @param string $filepath
69 */
70 public function setFilepath($filepath)
71 {
72 $this->filepath = $filepath;
73
74 return $this;
75 }
76
77 /**
78 * Parse and insert all given entries.
79 *
80 * @param $entries
81 */
82 protected function parseEntries($entries)
107 { 83 {
108 foreach ($data as $importedEntry) { 84 $i = 1;
109 $this->parseEntry($importedEntry); 85
86 foreach ($entries as $importedEntry) {
87 if ((array) $importedEntry !== $importedEntry) {
88 continue;
89 }
90
91 $entry = $this->parseEntry($importedEntry);
92
93 if (null === $entry) {
94 continue;
95 }
96
97 // flush every 20 entries
98 if (($i % 20) === 0) {
99 $this->em->flush();
100
101 // clear only affected entities
102 $this->em->clear(Entry::class);
103 $this->em->clear(Tag::class);
104 }
105 ++$i;
110 } 106 }
111 $this->totalEntries += count($data); 107
108 $this->em->flush();
112 } 109 }
113 110
114 private function parseEntry($importedEntry) 111 /**
112 * Parse entries and send them to the queue.
113 * It should just be a simple loop on all item, no call to the database should be done
114 * to speedup queuing.
115 *
116 * Faster parse entries for Producer.
117 * We don't care to make check at this time. They'll be done by the consumer.
118 *
119 * @param array $entries
120 */
121 protected function parseEntriesForProducer(array $entries)
115 { 122 {
116 if (!is_array($importedEntry)) { 123 foreach ($entries as $importedEntry) {
117 return; 124
125 if ((array) $importedEntry !== $importedEntry) {
126 continue;
127 }
128
129 // set userId for the producer (it won't know which user is connected)
130 $importedEntry['userId'] = $this->user->getId();
131
132 if ($this->markAsRead) {
133 $importedEntry = $this->setEntryAsRead($importedEntry);
134 }
135
136 ++$this->queuedEntries;
137
138 $this->producer->publish(json_encode($importedEntry));
118 } 139 }
140 }
119 141
120 /* Firefox uses guid while Chrome uses id */ 142 /**
143 * {@inheritdoc}
144 */
145 public function parseEntry(array $importedEntry)
146 {
121 147
122 if ((!key_exists('guid', $importedEntry) || (!key_exists('id', $importedEntry))) && is_array(reset($importedEntry))) { 148 if ((!key_exists('guid', $importedEntry) || (!key_exists('id', $importedEntry))) && is_array(reset($importedEntry))) {
123 $this->parseEntries($importedEntry); 149 $this->parseEntries($importedEntry);
124
125 return; 150 return;
126 } 151 }
152
127 if (key_exists('children', $importedEntry)) { 153 if (key_exists('children', $importedEntry)) {
128 $this->parseEntries($importedEntry['children']); 154 $this->parseEntries($importedEntry['children']);
129
130 return; 155 return;
131 } 156 }
132 if (key_exists('uri', $importedEntry) || key_exists('url', $importedEntry)) {
133
134 /* Firefox uses uri while Chrome uses url */
135 157
136 $firefox = key_exists('uri', $importedEntry); 158 if (!key_exists('uri', $importedEntry) && !key_exists('url', $importedEntry)) {
137 159 return;
138 $existingEntry = $this->em 160 }
139 ->getRepository('WallabagCoreBundle:Entry')
140 ->findByUrlAndUserId(($firefox) ? $importedEntry['uri'] : $importedEntry['url'], $this->user->getId());
141
142 if (false !== $existingEntry) {
143 ++$this->skippedEntries;
144 161
145 return; 162 $firefox = key_exists('uri', $importedEntry);
146 }
147 163
148 if (false === parse_url(($firefox) ? $importedEntry['uri'] : $importedEntry['url']) || false === filter_var(($firefox) ? $importedEntry['uri'] : $importedEntry['url'], FILTER_VALIDATE_URL)) { 164 $existingEntry = $this->em
149 $this->logger->warning('Imported URL '.($firefox) ? $importedEntry['uri'] : $importedEntry['url'].' is not valid'); 165 ->getRepository('WallabagCoreBundle:Entry')
150 ++$this->skippedEntries; 166 ->findByUrlAndUserId(($firefox) ? $importedEntry['uri'] : $importedEntry['url'], $this->user->getId());
151 167
152 return; 168 if (false !== $existingEntry) {
153 } 169 ++$this->skippedEntries;
154 170
155 try { 171 return;
156 $entry = $this->contentProxy->updateEntry( 172 }
157 new Entry($this->user),
158 ($firefox) ? $importedEntry['uri'] : $importedEntry['url']
159 );
160 } catch (\Exception $e) {
161 $this->logger->warning('Error while saving '.($firefox) ? $importedEntry['uri'] : $importedEntry['url']);
162 ++$this->skippedEntries;
163 173
164 return; 174 $data = $this->prepareEntry($importedEntry);
165 }
166 175
167 $entry->setArchived($this->markAsRead); 176 $entry = new Entry($this->user);
177 $entry->setUrl($data['url']);
178 $entry->setTitle($data['title']);
168 179
169 $this->em->persist($entry); 180 // update entry with content (in case fetching failed, the given entry will be return)
170 ++$this->importedEntries; 181 $entry = $this->fetchContent($entry, $data['url'], $data);
171 182
172 // flush every 20 entries 183 if (array_key_exists('tags', $data)) {
173 if (($this->nbEntries % 20) === 0) { 184 $this->contentProxy->assignTagsToEntry(
174 $this->em->flush(); 185 $entry,
175 $this->em->clear($entry); 186 $data['tags']
176 } 187 );
177 ++$this->nbEntries;
178 } 188 }
179 }
180 189
181 /** 190 $entry->setArchived($data['is_archived']);
182 * Set whether articles must be all marked as read.
183 *
184 * @param bool $markAsRead
185 *
186 * @return $this
187 */
188 public function setMarkAsRead($markAsRead)
189 {
190 $this->markAsRead = $markAsRead;
191 191
192 return $this; 192 if (!empty($data['created_at'])) {
193 } 193 $dt = new \DateTime();
194 $entry->setCreatedAt($dt->setTimestamp($data['created_at']/1000));
195 }
194 196
195 /** 197 $this->em->persist($entry);
196 * Set file path to the json file. 198 ++$this->importedEntries;
197 *
198 * @param string $filepath
199 *
200 * @return $this
201 */
202 public function setFilepath($filepath)
203 {
204 $this->filepath = $filepath;
205 199
206 return $this; 200 return $entry;
207 } 201 }
208 202
209 /** 203 /**
210 * {@inheritdoc} 204 * {@inheritdoc}
211 */ 205 */
212 public function getSummary() 206 protected function setEntryAsRead(array $importedEntry)
213 { 207 {
214 return [ 208 $importedEntry['is_archived'] = 1;
215 'skipped' => $this->skippedEntries, 209
216 'imported' => $this->importedEntries, 210 return $importedEntry;
217 ];
218 } 211 }
219} 212}
diff --git a/src/Wallabag/ImportBundle/Import/ChromeImport.php b/src/Wallabag/ImportBundle/Import/ChromeImport.php
new file mode 100644
index 00000000..7936ee2f
--- /dev/null
+++ b/src/Wallabag/ImportBundle/Import/ChromeImport.php
@@ -0,0 +1,71 @@
1<?php
2
3namespace Wallabag\ImportBundle\Import;
4
5use Psr\Log\LoggerInterface;
6use Psr\Log\NullLogger;
7use Doctrine\ORM\EntityManager;
8use Wallabag\CoreBundle\Entity\Entry;
9use Wallabag\UserBundle\Entity\User;
10use Wallabag\CoreBundle\Helper\ContentProxy;
11
12class ChromeImport extends BrowserImport
13{
14 protected $filepath;
15
16 /**
17 * {@inheritdoc}
18 */
19 public function getName()
20 {
21 return 'Chrome';
22 }
23
24 /**
25 * {@inheritdoc}
26 */
27 public function getUrl()
28 {
29 return 'import_chrome';
30 }
31
32 /**
33 * {@inheritdoc}
34 */
35 public function getDescription()
36 {
37 return 'import.chrome.description';
38 }
39
40 /**
41 * {@inheritdoc}
42 */
43 protected function prepareEntry($entry = [])
44 {
45 $data = [
46 'title' => $entry['name'],
47 'html' => '',
48 'url' => $entry['url'],
49 'is_archived' => $this->markAsRead,
50 'tags' => '',
51 'created_at' => $entry['date_added'],
52 ];
53
54 if (array_key_exists('tags', $entry) && $entry['tags'] != '') {
55 $data['tags'] = $entry['tags'];
56 }
57
58 return $data;
59 }
60
61
62 /**
63 * {@inheritdoc}
64 */
65 protected function setEntryAsRead(array $importedEntry)
66 {
67 $importedEntry['is_archived'] = 1;
68
69 return $importedEntry;
70 }
71}
diff --git a/src/Wallabag/ImportBundle/Import/FirefoxImport.php b/src/Wallabag/ImportBundle/Import/FirefoxImport.php
new file mode 100644
index 00000000..cbf10b87
--- /dev/null
+++ b/src/Wallabag/ImportBundle/Import/FirefoxImport.php
@@ -0,0 +1,71 @@
1<?php
2
3namespace Wallabag\ImportBundle\Import;
4
5use Psr\Log\LoggerInterface;
6use Psr\Log\NullLogger;
7use Doctrine\ORM\EntityManager;
8use Wallabag\CoreBundle\Entity\Entry;
9use Wallabag\UserBundle\Entity\User;
10use Wallabag\CoreBundle\Helper\ContentProxy;
11
12class FirefoxImport extends BrowserImport
13{
14 protected $filepath;
15
16 /**
17 * {@inheritdoc}
18 */
19 public function getName()
20 {
21 return 'Firefox';
22 }
23
24 /**
25 * {@inheritdoc}
26 */
27 public function getUrl()
28 {
29 return 'import_firefox';
30 }
31
32 /**
33 * {@inheritdoc}
34 */
35 public function getDescription()
36 {
37 return 'import.firefox.description';
38 }
39
40 /**
41 * {@inheritdoc}
42 */
43 protected function prepareEntry($entry = [])
44 {
45 $data = [
46 'title' => $entry['name'],
47 'html' => '',
48 'url' => $entry['url'],
49 'is_archived' => $this->markAsRead,
50 'tags' => '',
51 'created_at' => $entry['date_added'],
52 ];
53
54 if (array_key_exists('tags', $entry) && $entry['tags'] != '') {
55 $data['tags'] = $entry['tags'];
56 }
57
58 return $data;
59 }
60
61
62 /**
63 * {@inheritdoc}
64 */
65 protected function setEntryAsRead(array $importedEntry)
66 {
67 $importedEntry['is_archived'] = 1;
68
69 return $importedEntry;
70 }
71}