]> git.immae.eu Git - github/wallabag/wallabag.git/blob - src/Wallabag/ImportBundle/Import/AbstractImport.php
Merge pull request #4438 from wallabag/dependabot/composer/scheb/two-factor-bundle...
[github/wallabag/wallabag.git] / src / Wallabag / ImportBundle / Import / AbstractImport.php
1 <?php
2
3 namespace Wallabag\ImportBundle\Import;
4
5 use Doctrine\ORM\EntityManager;
6 use OldSound\RabbitMqBundle\RabbitMq\ProducerInterface;
7 use Psr\Log\LoggerInterface;
8 use Psr\Log\NullLogger;
9 use Symfony\Component\EventDispatcher\EventDispatcherInterface;
10 use Wallabag\CoreBundle\Entity\Entry;
11 use Wallabag\CoreBundle\Entity\Tag;
12 use Wallabag\CoreBundle\Event\EntrySavedEvent;
13 use Wallabag\CoreBundle\Helper\ContentProxy;
14 use Wallabag\CoreBundle\Helper\TagsAssigner;
15 use Wallabag\UserBundle\Entity\User;
16
17 abstract class AbstractImport implements ImportInterface
18 {
19 protected $em;
20 protected $logger;
21 protected $contentProxy;
22 protected $tagsAssigner;
23 protected $eventDispatcher;
24 protected $producer;
25 protected $user;
26 protected $markAsRead;
27 protected $disableContentUpdate = false;
28 protected $skippedEntries = 0;
29 protected $importedEntries = 0;
30 protected $queuedEntries = 0;
31
32 public function __construct(EntityManager $em, ContentProxy $contentProxy, TagsAssigner $tagsAssigner, EventDispatcherInterface $eventDispatcher)
33 {
34 $this->em = $em;
35 $this->logger = new NullLogger();
36 $this->contentProxy = $contentProxy;
37 $this->tagsAssigner = $tagsAssigner;
38 $this->eventDispatcher = $eventDispatcher;
39 }
40
41 public function setLogger(LoggerInterface $logger)
42 {
43 $this->logger = $logger;
44 }
45
46 /**
47 * Set RabbitMQ/Redis Producer to send each entry to a queue.
48 * This method should be called when user has enabled RabbitMQ.
49 */
50 public function setProducer(ProducerInterface $producer)
51 {
52 $this->producer = $producer;
53 }
54
55 /**
56 * Set current user.
57 * Could the current *connected* user or one retrieve by the consumer.
58 */
59 public function setUser(User $user)
60 {
61 $this->user = $user;
62 }
63
64 /**
65 * Set whether articles must be all marked as read.
66 *
67 * @param bool $markAsRead
68 */
69 public function setMarkAsRead($markAsRead)
70 {
71 $this->markAsRead = $markAsRead;
72
73 return $this;
74 }
75
76 /**
77 * Get whether articles must be all marked as read.
78 */
79 public function getMarkAsRead()
80 {
81 return $this->markAsRead;
82 }
83
84 /**
85 * Set whether articles should be fetched for updated content.
86 *
87 * @param bool $disableContentUpdate
88 */
89 public function setDisableContentUpdate($disableContentUpdate)
90 {
91 $this->disableContentUpdate = $disableContentUpdate;
92
93 return $this;
94 }
95
96 /**
97 * {@inheritdoc}
98 */
99 public function getSummary()
100 {
101 return [
102 'skipped' => $this->skippedEntries,
103 'imported' => $this->importedEntries,
104 'queued' => $this->queuedEntries,
105 ];
106 }
107
108 /**
109 * Parse one entry.
110 *
111 * @return Entry
112 */
113 abstract public function parseEntry(array $importedEntry);
114
115 /**
116 * Validate that an entry is valid (like has some required keys, etc.).
117 *
118 * @return bool
119 */
120 abstract public function validateEntry(array $importedEntry);
121
122 /**
123 * Fetch content from the ContentProxy (using graby).
124 * If it fails return the given entry to be saved in all case (to avoid user to loose the content).
125 *
126 * @param Entry $entry Entry to update
127 * @param string $url Url to grab content for
128 * @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url
129 */
130 protected function fetchContent(Entry $entry, $url, array $content = [])
131 {
132 try {
133 $this->contentProxy->updateEntry($entry, $url, $content, $this->disableContentUpdate);
134 } catch (\Exception $e) {
135 $this->logger->error('Error trying to import an entry.', [
136 'entry_url' => $url,
137 'error_msg' => $e->getMessage(),
138 ]);
139 }
140 }
141
142 /**
143 * Parse and insert all given entries.
144 */
145 protected function parseEntries(array $entries)
146 {
147 $i = 1;
148 $entryToBeFlushed = [];
149
150 foreach ($entries as $importedEntry) {
151 if ($this->markAsRead) {
152 $importedEntry = $this->setEntryAsRead($importedEntry);
153 }
154
155 if (false === $this->validateEntry($importedEntry)) {
156 continue;
157 }
158
159 $entry = $this->parseEntry($importedEntry);
160
161 if (null === $entry) {
162 continue;
163 }
164
165 // store each entry to be flushed so we can trigger the entry.saved event for each of them
166 // entry.saved needs the entry to be persisted in db because it needs it id to generate
167 // images (at least)
168 $entryToBeFlushed[] = $entry;
169
170 // flush every 20 entries
171 if (0 === ($i % 20)) {
172 $this->em->flush();
173
174 foreach ($entryToBeFlushed as $entry) {
175 $this->eventDispatcher->dispatch(EntrySavedEvent::NAME, new EntrySavedEvent($entry));
176 }
177
178 $entryToBeFlushed = [];
179
180 // clear only affected entities
181 $this->em->clear(Entry::class);
182 $this->em->clear(Tag::class);
183 }
184 ++$i;
185 }
186
187 $this->em->flush();
188
189 if (!empty($entryToBeFlushed)) {
190 foreach ($entryToBeFlushed as $entry) {
191 $this->eventDispatcher->dispatch(EntrySavedEvent::NAME, new EntrySavedEvent($entry));
192 }
193 }
194 }
195
196 /**
197 * Parse entries and send them to the queue.
198 * It should just be a simple loop on all item, no call to the database should be done
199 * to speedup queuing.
200 *
201 * Faster parse entries for Producer.
202 * We don't care to make check at this time. They'll be done by the consumer.
203 */
204 protected function parseEntriesForProducer(array $entries)
205 {
206 foreach ($entries as $importedEntry) {
207 // set userId for the producer (it won't know which user is connected)
208 $importedEntry['userId'] = $this->user->getId();
209
210 if ($this->markAsRead) {
211 $importedEntry = $this->setEntryAsRead($importedEntry);
212 }
213
214 ++$this->queuedEntries;
215
216 $this->producer->publish(json_encode($importedEntry));
217 }
218 }
219
220 /**
221 * Set current imported entry to archived / read.
222 * Implementation is different accross all imports.
223 *
224 * @return array
225 */
226 abstract protected function setEntryAsRead(array $importedEntry);
227 }