]> git.immae.eu Git - github/wallabag/wallabag.git/blob - src/Wallabag/ImportBundle/Import/AbstractImport.php
Add disableContentUpdate import option
[github/wallabag/wallabag.git] / src / Wallabag / ImportBundle / Import / AbstractImport.php
1 <?php
2
3 namespace Wallabag\ImportBundle\Import;
4
5 use Psr\Log\LoggerInterface;
6 use Psr\Log\NullLogger;
7 use Doctrine\ORM\EntityManager;
8 use Wallabag\CoreBundle\Helper\ContentProxy;
9 use Wallabag\CoreBundle\Entity\Entry;
10 use Wallabag\CoreBundle\Entity\Tag;
11 use Wallabag\CoreBundle\Helper\TagsAssigner;
12 use Wallabag\UserBundle\Entity\User;
13 use OldSound\RabbitMqBundle\RabbitMq\ProducerInterface;
14 use Symfony\Component\EventDispatcher\EventDispatcherInterface;
15 use Wallabag\CoreBundle\Event\EntrySavedEvent;
16
17 abstract class AbstractImport implements ImportInterface
18 {
19 protected $em;
20 protected $logger;
21 protected $contentProxy;
22 protected $tagsAssigner;
23 protected $eventDispatcher;
24 protected $producer;
25 protected $user;
26 protected $markAsRead;
27 protected $disableContentUpdate;
28 protected $skippedEntries = 0;
29 protected $importedEntries = 0;
30 protected $queuedEntries = 0;
31
32 public function __construct(EntityManager $em, ContentProxy $contentProxy, TagsAssigner $tagsAssigner, EventDispatcherInterface $eventDispatcher)
33 {
34 $this->em = $em;
35 $this->logger = new NullLogger();
36 $this->contentProxy = $contentProxy;
37 $this->tagsAssigner = $tagsAssigner;
38 $this->eventDispatcher = $eventDispatcher;
39 }
40
41 public function setLogger(LoggerInterface $logger)
42 {
43 $this->logger = $logger;
44 }
45
46 /**
47 * Set RabbitMQ/Redis Producer to send each entry to a queue.
48 * This method should be called when user has enabled RabbitMQ.
49 *
50 * @param ProducerInterface $producer
51 */
52 public function setProducer(ProducerInterface $producer)
53 {
54 $this->producer = $producer;
55 }
56
57 /**
58 * Set current user.
59 * Could the current *connected* user or one retrieve by the consumer.
60 *
61 * @param User $user
62 */
63 public function setUser(User $user)
64 {
65 $this->user = $user;
66 }
67
68 /**
69 * Set whether articles must be all marked as read.
70 *
71 * @param bool $markAsRead
72 */
73 public function setMarkAsRead($markAsRead)
74 {
75 $this->markAsRead = $markAsRead;
76
77 return $this;
78 }
79
80 /**
81 * Get whether articles must be all marked as read.
82 */
83 public function getMarkAsRead()
84 {
85 return $this->markAsRead;
86 }
87
88 /**
89 * Set whether articles should be fetched for updated content.
90 *
91 * @param bool $markAsRead
92 */
93 public function setDisableContentUpdate($disableContentUpdate)
94 {
95 $this->disableContentUpdate = $disableContentUpdate;
96
97 return $this;
98 }
99
100 /**
101 * Get whether articles should be fetched for updated content.
102 */
103 public function getDisableContentUpdate()
104 {
105 return $this->disableContentUpdate;
106 }
107
108
109 /**
110 * Fetch content from the ContentProxy (using graby).
111 * If it fails return the given entry to be saved in all case (to avoid user to loose the content).
112 *
113 * @param Entry $entry Entry to update
114 * @param string $url Url to grab content for
115 * @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url
116 */
117 protected function fetchContent(Entry $entry, $url, array $content = [])
118 {
119 try {
120 $this->contentProxy->importEntry($entry, $content, $this->disableContentUpdate);
121 } catch (\Exception $e) {
122 $this->logger->error('Error trying to import an entry.', [
123 'entry_url' => $content['url'],
124 'error_msg' => $e->getMessage(),
125 ]);
126 }
127 }
128
129 /**
130 * Parse and insert all given entries.
131 *
132 * @param $entries
133 */
134 protected function parseEntries($entries)
135 {
136 $i = 1;
137 $entryToBeFlushed = [];
138
139 foreach ($entries as $importedEntry) {
140 if ($this->markAsRead) {
141 $importedEntry = $this->setEntryAsRead($importedEntry);
142 }
143
144 $entry = $this->parseEntry($importedEntry);
145
146 if (null === $entry) {
147 continue;
148 }
149
150 // store each entry to be flushed so we can trigger the entry.saved event for each of them
151 // entry.saved needs the entry to be persisted in db because it needs it id to generate
152 // images (at least)
153 $entryToBeFlushed[] = $entry;
154
155 // flush every 20 entries
156 if (($i % 20) === 0) {
157 $this->em->flush();
158
159 foreach ($entryToBeFlushed as $entry) {
160 $this->eventDispatcher->dispatch(EntrySavedEvent::NAME, new EntrySavedEvent($entry));
161 }
162
163 $entryToBeFlushed = [];
164
165 // clear only affected entities
166 $this->em->clear(Entry::class);
167 $this->em->clear(Tag::class);
168 }
169 ++$i;
170 }
171
172 $this->em->flush();
173
174 if (!empty($entryToBeFlushed)) {
175 foreach ($entryToBeFlushed as $entry) {
176 $this->eventDispatcher->dispatch(EntrySavedEvent::NAME, new EntrySavedEvent($entry));
177 }
178 }
179 }
180
181 /**
182 * Parse entries and send them to the queue.
183 * It should just be a simple loop on all item, no call to the database should be done
184 * to speedup queuing.
185 *
186 * Faster parse entries for Producer.
187 * We don't care to make check at this time. They'll be done by the consumer.
188 *
189 * @param array $entries
190 */
191 protected function parseEntriesForProducer(array $entries)
192 {
193 foreach ($entries as $importedEntry) {
194 // set userId for the producer (it won't know which user is connected)
195 $importedEntry['userId'] = $this->user->getId();
196
197 if ($this->markAsRead) {
198 $importedEntry = $this->setEntryAsRead($importedEntry);
199 }
200
201 ++$this->queuedEntries;
202
203 $this->producer->publish(json_encode($importedEntry));
204 }
205 }
206
207 /**
208 * {@inheritdoc}
209 */
210 public function getSummary()
211 {
212 return [
213 'skipped' => $this->skippedEntries,
214 'imported' => $this->importedEntries,
215 'queued' => $this->queuedEntries,
216 ];
217 }
218
219 /**
220 * Parse one entry.
221 *
222 * @param array $importedEntry
223 *
224 * @return Entry
225 */
226 abstract public function parseEntry(array $importedEntry);
227
228 /**
229 * Set current imported entry to archived / read.
230 * Implementation is different accross all imports.
231 *
232 * @param array $importedEntry
233 *
234 * @return array
235 */
236 abstract protected function setEntryAsRead(array $importedEntry);
237 }