src/Wallabag/CoreBundle/Helper/ContentProxy.php

   1 <?php
   2
   3 namespace Wallabag\CoreBundle\Helper;
   4
   5 use Graby\Graby;
   6 use Psr\Log\LoggerInterface as Logger;
   7 use Wallabag\CoreBundle\Entity\Entry;
   8 use Wallabag\CoreBundle\Entity\Tag;
   9 use Wallabag\CoreBundle\Tools\Utils;
  10 use Wallabag\CoreBundle\Repository\TagRepository;
  11 use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
  12
  13 /**
  14  * This kind of proxy class take care of getting the content from an url
  15  * and update the entry with what it found.
  16  */
  17 class ContentProxy
  18 {
  19     protected $graby;
  20     protected $tagger;
  21     protected $logger;
  22     protected $tagRepository;
  23     protected $mimeGuesser;
  24
  25     public function __construct(Graby $graby, RuleBasedTagger $tagger, TagRepository $tagRepository, Logger $logger)
  26     {
  27         $this->graby = $graby;
  28         $this->tagger = $tagger;
  29         $this->logger = $logger;
  30         $this->tagRepository = $tagRepository;
  31         $this->mimeGuesser = new MimeTypeExtensionGuesser();
  32     }
  33
  34     /**
  35      * Fetch content using graby and hydrate given entry with results information.
  36      * In case we couldn't find content, we'll try to use Open Graph data.
  37      *
  38      * We can also force the content, in case of an import from the v1 for example, so the function won't
  39      * fetch the content from the website but rather use information given with the $content parameter.
  40      *
  41      * @param Entry  $entry   Entry to update
  42      * @param string $url     Url to grab content for
  43      * @param array  $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url
  44      *
  45      * @return Entry
  46      */
  47     public function updateEntry(Entry $entry, $url, array $content = [])
  48     {
  49         // do we have to fetch the content or the provided one is ok?
  50         if (empty($content) || false === $this->validateContent($content)) {
  51             $content = $this->graby->fetchContent($url);
  52         }
  53
  54         $title = $content['title'];
  55         if (!$title && isset($content['open_graph']['og_title'])) {
  56             $title = $content['open_graph']['og_title'];
  57         }
  58
  59         $html = $content['html'];
  60         if (false === $html) {
  61             $html = '<p>Unable to retrieve readable content.</p>';
  62
  63             if (isset($content['open_graph']['og_description'])) {
  64                 $html .= '<p><i>But we found a short description: </i></p>';
  65                 $html .= $content['open_graph']['og_description'];
  66             }
  67         }
  68
  69         $entry->setUrl($content['url'] ?: $url);
  70         $entry->setTitle($title);
  71         $entry->setContent($html);
  72         $entry->setLanguage($content['language']);
  73         $entry->setMimetype($content['content_type']);
  74         $entry->setReadingTime(Utils::getReadingTime($html));
  75
  76         $domainName = parse_url($entry->getUrl(), PHP_URL_HOST);
  77         if (false !== $domainName) {
  78             $entry->setDomainName($domainName);
  79         }
  80
  81         if (isset($content['open_graph']['og_image'])) {
  82             $entry->setPreviewPicture($content['open_graph']['og_image']);
  83         }
  84
  85         // if content is an image define as a preview too
  86         if (in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
  87             $entry->setPreviewPicture($content['url']);
  88         }
  89
  90         try {
  91             $this->tagger->tag($entry);
  92         } catch (\Exception $e) {
  93             $this->logger->error('Error while trying to automatically tag an entry.', [
  94                 'entry_url' => $url,
  95                 'error_msg' => $e->getMessage(),
  96             ]);
  97         }
  98
  99         return $entry;
 100     }
 101
 102     /**
 103      * Assign some tags to an entry.
 104      *
 105      * @param Entry        $entry
 106      * @param array|string $tags          An array of tag or a string coma separated of tag
 107      * @param array        $entitiesReady Entities from the EntityManager which are persisted but not yet flushed
 108      *                                    It is mostly to fix duplicate tag on import @see http://stackoverflow.com/a/7879164/569101
 109      */
 110     public function assignTagsToEntry(Entry $entry, $tags, array $entitiesReady = [])
 111     {
 112         if (!is_array($tags)) {
 113             $tags = explode(',', $tags);
 114         }
 115
 116         // keeps only Tag entity from the "not yet flushed entities"
 117         $tagsNotYetFlushed = [];
 118         foreach ($entitiesReady as $entity) {
 119             if ($entity instanceof Tag) {
 120                 $tagsNotYetFlushed[$entity->getLabel()] = $entity;
 121             }
 122         }
 123
 124         foreach ($tags as $label) {
 125             $label = trim($label);
 126
 127             // avoid empty tag
 128             if (0 === strlen($label)) {
 129                 continue;
 130             }
 131
 132             if (isset($tagsNotYetFlushed[$label])) {
 133                 $tagEntity = $tagsNotYetFlushed[$label];
 134             } else {
 135                 $tagEntity = $this->tagRepository->findOneByLabel($label);
 136
 137                 if (is_null($tagEntity)) {
 138                     $tagEntity = new Tag();
 139                     $tagEntity->setLabel($label);
 140                 }
 141             }
 142
 143             // only add the tag on the entry if the relation doesn't exist
 144             if (false === $entry->getTags()->contains($tagEntity)) {
 145                 $entry->addTag($tagEntity);
 146             }
 147         }
 148     }
 149
 150     /**
 151      * Validate that the given content as enough value to be used
 152      * instead of fetch the content from the url.
 153      *
 154      * @param array $content
 155      *
 156      * @return bool true if valid otherwise false
 157      */
 158     private function validateContent(array $content)
 159     {
 160         return isset($content['title']) && isset($content['html']) && isset($content['url']) && isset($content['language']) && isset($content['content_type']);
 161     }
 162 }