use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Tools\Utils;
use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
+use Symfony\Component\Config\Definition\Exception\Exception;
/**
* This kind of proxy class take care of getting the content from an url
}
/**
- * Fetch content using graby and hydrate given $entry with results information.
- * In case we couldn't find content, we'll try to use Open Graph data.
- *
- * We can also force the content, in case of an import from the v1 for example, so the function won't
- * fetch the content from the website but rather use information given with the $content parameter.
+ * Update existing entry by fetching from URL using Graby.
*
* @param Entry $entry Entry to update
* @param string $url Url to grab content for
- * @param array $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url
*/
- public function updateEntry(Entry $entry, $url, array $content = [])
+ public function updateEntry(Entry $entry, $url)
{
- // ensure content is a bit cleaned up
- if (!empty($content['html'])) {
- $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
- }
+ $content = $this->graby->fetchContent($url);
+
+ $this->stockEntry($entry, $content);
+ }
+
+ /**
+ * Import entry using either fetched or provided content.
+ *
+ * @param Entry $entry Entry to update
+ * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
+ * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
+ */
+ public function importEntry(Entry $entry, array $content, $disableContentUpdate = false)
+ {
+ $this->validateContent($content);
- // do we have to fetch the content or the provided one is ok?
- if (empty($content) || false === $this->validateContent($content)) {
- $fetchedContent = $this->graby->fetchContent($url);
+ if (false === $disableContentUpdate) {
+ try {
+ $fetchedContent = $this->graby->fetchContent($content['url']);
+ } catch (\Exception $e) {
+ $this->logger->error('Error while trying to fetch content from URL.', [
+ 'entry_url' => $content['url'],
+ 'error_msg' => $e->getMessage(),
+ ]);
+ }
// when content is imported, we have information in $content
// in case fetching content goes bad, we'll keep the imported information instead of overriding them
- if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) {
+ if ($fetchedContent['html'] !== $this->fetchingErrorMessage) {
$content = $fetchedContent;
}
}
+ $this->stockEntry($entry, $content);
+ }
+
+ /**
+ * Stock entry with fetched or imported content.
+ * Will fall back to OpenGraph data if available.
+ *
+ * @param Entry $entry Entry to stock
+ * @param array $content Array with at least title and URL
+ */
+ private function stockEntry(Entry $entry, array $content)
+ {
$title = $content['title'];
if (!$title && !empty($content['open_graph']['og_title'])) {
$title = $content['open_graph']['og_title'];
}
}
- $entry->setUrl($content['url'] ?: $url);
+ $entry->setUrl($content['url']);
$entry->setTitle($title);
$entry->setContent($html);
$entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
$this->tagger->tag($entry);
} catch (\Exception $e) {
$this->logger->error('Error while trying to automatically tag an entry.', [
- 'entry_url' => $url,
+ 'entry_url' => $content['url'],
'error_msg' => $e->getMessage(),
]);
}
}
/**
- * Validate that the given content as enough value to be used
- * instead of fetch the content from the url.
+ * Validate that the given content has at least a title, an html and a url.
*
* @param array $content
- *
- * @return bool true if valid otherwise false
*/
private function validateContent(array $content)
{
- return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
+ if (!empty($content['title']))) {
+ throw new Exception('Missing title from imported entry!');
+ }
+
+ if (!empty($content['url']))) {
+ throw new Exception('Missing URL from imported entry!');
+ }
+
+ if (!empty($content['html']))) {
+ throw new Exception('Missing html from imported entry!');
+ }
}
}
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Config\Definition\Exception\Exception;
use Symfony\Component\Console\Input\InputArgument;
+use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
->addArgument('filepath', InputArgument::REQUIRED, 'Path to the JSON file')
->addOption('importer', null, InputArgument::OPTIONAL, 'The importer to use: v1, v2, instapaper, pinboard, readability, firefox or chrome', 'v1')
->addOption('markAsRead', null, InputArgument::OPTIONAL, 'Mark all entries as read', false)
- ->addOption('useUserId', null, InputArgument::OPTIONAL, 'Use user id instead of username to find account', false)
+ ->addOption('disableContentUpdate', null, InputOption::VALUE_NONE, 'Disable fetching updated content from URL')
;
}
}
$import->setMarkAsRead($input->getOption('markAsRead'));
+ $import->setDisableContentUpdate($input->getOption('disableContentUpdate'));
$import->setUser($user);
$res = $import
protected $producer;
protected $user;
protected $markAsRead;
+ protected $disableContentUpdate;
protected $skippedEntries = 0;
protected $importedEntries = 0;
protected $queuedEntries = 0;
return $this->markAsRead;
}
+ /**
+ * Set whether articles should be fetched for updated content.
+ *
+ * @param bool $markAsRead
+ */
+ public function setDisableContentUpdate($disableContentUpdate)
+ {
+ $this->disableContentUpdate = $disableContentUpdate;
+
+ return $this;
+ }
+
+ /**
+ * Get whether articles should be fetched for updated content.
+ */
+ public function getDisableContentUpdate()
+ {
+ return $this->disableContentUpdate;
+ }
+
+
/**
* Fetch content from the ContentProxy (using graby).
* If it fails return the given entry to be saved in all case (to avoid user to loose the content).
protected function fetchContent(Entry $entry, $url, array $content = [])
{
try {
- $this->contentProxy->updateEntry($entry, $url, $content);
+ $this->contentProxy->importEntry($entry, $content, $this->disableContentUpdate);
} catch (\Exception $e) {
- return $entry;
+ $this->logger->error('Error trying to import an entry.', [
+ 'entry_url' => $content['url'],
+ 'error_msg' => $e->getMessage(),
+ ]);
}
}
$proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
- $proxy->updateEntry(
+ $proxy->importEntry(
$entry,
- 'http://0.0.0.0',
[
'html' => str_repeat('this is my content', 325),
'title' => 'this is my title',
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
- 'http://0.0.0.0',
[
'html' => str_repeat('this is my content', 325),
'title' => 'this is my title',
$proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
$entry = new Entry(new User());
- $proxy->updateEntry($entry, 'http://0.0.0.0', [
+ $content = array(
'html' => str_repeat('this is my content', 325),
'title' => 'this is my title',
'url' => 'http://1.1.1.1',
'content_type' => 'text/html',
'language' => 'fr',
- ]);
+ );
+ $proxy->importEntry($entry, $content, true);
$this->assertCount(0, $entry->getTags());
}
$this->contentProxy
->expects($this->exactly(1))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn($entry);
$res = $chromeImport->import();
$this->contentProxy
->expects($this->exactly(1))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn(new Entry($this->user));
// check that every entry persisted are archived
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
->disableOriginalConstructor()
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$factory = new RedisMockFactory();
$redisMock = $factory->getAdapter('Predis\Client', true);
$this->contentProxy
->expects($this->exactly(2))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn($entry);
$res = $firefoxImport->import();
$this->contentProxy
->expects($this->exactly(1))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn(new Entry($this->user));
// check that every entry persisted are archived
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
->disableOriginalConstructor()
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$factory = new RedisMockFactory();
$redisMock = $factory->getAdapter('Predis\Client', true);
$this->contentProxy
->expects($this->exactly(4))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn($entry);
$res = $instapaperImport->import();
$this->contentProxy
->expects($this->once())
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn(new Entry($this->user));
// check that every entry persisted are archived
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
->disableOriginalConstructor()
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$factory = new RedisMockFactory();
$redisMock = $factory->getAdapter('Predis\Client', true);
$this->contentProxy
->expects($this->once())
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn($entry);
$pocketImport->setClient($client);
$this->contentProxy
->expects($this->exactly(2))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn($entry);
$pocketImport->setClient($client);
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
->disableOriginalConstructor()
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('ImportEntry');
$factory = new RedisMockFactory();
$redisMock = $factory->getAdapter('Predis\Client', true);
$this->contentProxy
->expects($this->once())
- ->method('updateEntry')
+ ->method('importEntry')
->will($this->throwException(new \Exception()));
$pocketImport->setClient($client);
$this->contentProxy
->expects($this->exactly(3))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn($entry);
$res = $readabilityImport->import();
$this->contentProxy
->expects($this->exactly(1))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn(new Entry($this->user));
// check that every entry persisted are archived
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
->disableOriginalConstructor()
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$factory = new RedisMockFactory();
$redisMock = $factory->getAdapter('Predis\Client', true);
$this->contentProxy
->expects($this->exactly(1))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn($entry);
$res = $wallabagV1Import->import();
$this->contentProxy
->expects($this->exactly(3))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn(new Entry($this->user));
// check that every entry persisted are archived
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
->disableOriginalConstructor()
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$factory = new RedisMockFactory();
$redisMock = $factory->getAdapter('Predis\Client', true);
$this->contentProxy
->expects($this->exactly(2))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn(new Entry($this->user));
$res = $wallabagV2Import->import();
$this->contentProxy
->expects($this->exactly(2))
- ->method('updateEntry')
+ ->method('importEntry')
->willReturn(new Entry($this->user));
// check that every entry persisted are archived
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
->disableOriginalConstructor()
$this->contentProxy
->expects($this->never())
- ->method('updateEntry');
+ ->method('importEntry');
$factory = new RedisMockFactory();
$redisMock = $factory->getAdapter('Predis\Client', true);
$this->contentProxy
->expects($this->exactly(2))
- ->method('updateEntry')
+ ->method('importEntry')
->will($this->throwException(new \Exception()));
$res = $wallabagV2Import->import();