diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 34 |
1 files changed, 25 insertions, 9 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 4b3e6fbb..8ba77ca9 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -31,7 +31,7 @@ class ContentProxy | |||
31 | } | 31 | } |
32 | 32 | ||
33 | /** | 33 | /** |
34 | * Fetch content using graby and hydrate given entry with results information. | 34 | * Fetch content using graby and hydrate given $entry with results information. |
35 | * In case we couldn't find content, we'll try to use Open Graph data. | 35 | * In case we couldn't find content, we'll try to use Open Graph data. |
36 | * | 36 | * |
37 | * We can also force the content, in case of an import from the v1 for example, so the function won't | 37 | * We can also force the content, in case of an import from the v1 for example, so the function won't |
@@ -39,12 +39,17 @@ class ContentProxy | |||
39 | * | 39 | * |
40 | * @param Entry $entry Entry to update | 40 | * @param Entry $entry Entry to update |
41 | * @param string $url Url to grab content for | 41 | * @param string $url Url to grab content for |
42 | * @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url | 42 | * @param array $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url |
43 | * | 43 | * |
44 | * @return Entry | 44 | * @return Entry |
45 | */ | 45 | */ |
46 | public function updateEntry(Entry $entry, $url, array $content = []) | 46 | public function updateEntry(Entry $entry, $url, array $content = []) |
47 | { | 47 | { |
48 | // ensure content is a bit cleaned up | ||
49 | if (!empty($content['html'])) { | ||
50 | $content['html'] = $this->graby->cleanupHtml($content['html'], $url); | ||
51 | } | ||
52 | |||
48 | // do we have to fetch the content or the provided one is ok? | 53 | // do we have to fetch the content or the provided one is ok? |
49 | if (empty($content) || false === $this->validateContent($content)) { | 54 | if (empty($content) || false === $this->validateContent($content)) { |
50 | $fetchedContent = $this->graby->fetchContent($url); | 55 | $fetchedContent = $this->graby->fetchContent($url); |
@@ -57,7 +62,7 @@ class ContentProxy | |||
57 | } | 62 | } |
58 | 63 | ||
59 | $title = $content['title']; | 64 | $title = $content['title']; |
60 | if (!$title && isset($content['open_graph']['og_title'])) { | 65 | if (!$title && !empty($content['open_graph']['og_title'])) { |
61 | $title = $content['open_graph']['og_title']; | 66 | $title = $content['open_graph']['og_title']; |
62 | } | 67 | } |
63 | 68 | ||
@@ -65,7 +70,7 @@ class ContentProxy | |||
65 | if (false === $html) { | 70 | if (false === $html) { |
66 | $html = $this->fetchingErrorMessage; | 71 | $html = $this->fetchingErrorMessage; |
67 | 72 | ||
68 | if (isset($content['open_graph']['og_description'])) { | 73 | if (!empty($content['open_graph']['og_description'])) { |
69 | $html .= '<p><i>But we found a short description: </i></p>'; | 74 | $html .= '<p><i>But we found a short description: </i></p>'; |
70 | $html .= $content['open_graph']['og_description']; | 75 | $html .= $content['open_graph']['og_description']; |
71 | } | 76 | } |
@@ -76,8 +81,19 @@ class ContentProxy | |||
76 | $entry->setContent($html); | 81 | $entry->setContent($html); |
77 | $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); | 82 | $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); |
78 | 83 | ||
79 | if (isset($content['date']) && null !== $content['date'] && '' !== $content['date']) { | 84 | if (!empty($content['date'])) { |
80 | $entry->setPublishedAt(new \DateTime($content['date'])); | 85 | $date = $content['date']; |
86 | |||
87 | // is it a timestamp? | ||
88 | if (filter_var($date, FILTER_VALIDATE_INT) !== false) { | ||
89 | $date = '@'.$content['date']; | ||
90 | } | ||
91 | |||
92 | try { | ||
93 | $entry->setPublishedAt(new \DateTime($date)); | ||
94 | } catch (\Exception $e) { | ||
95 | $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]); | ||
96 | } | ||
81 | } | 97 | } |
82 | 98 | ||
83 | if (!empty($content['authors'])) { | 99 | if (!empty($content['authors'])) { |
@@ -97,12 +113,12 @@ class ContentProxy | |||
97 | $entry->setDomainName($domainName); | 113 | $entry->setDomainName($domainName); |
98 | } | 114 | } |
99 | 115 | ||
100 | if (isset($content['open_graph']['og_image']) && $content['open_graph']['og_image']) { | 116 | if (!empty($content['open_graph']['og_image'])) { |
101 | $entry->setPreviewPicture($content['open_graph']['og_image']); | 117 | $entry->setPreviewPicture($content['open_graph']['og_image']); |
102 | } | 118 | } |
103 | 119 | ||
104 | // if content is an image define as a preview too | 120 | // if content is an image define as a preview too |
105 | if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { | 121 | if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { |
106 | $entry->setPreviewPicture($content['url']); | 122 | $entry->setPreviewPicture($content['url']); |
107 | } | 123 | } |
108 | 124 | ||
@@ -128,6 +144,6 @@ class ContentProxy | |||
128 | */ | 144 | */ |
129 | private function validateContent(array $content) | 145 | private function validateContent(array $content) |
130 | { | 146 | { |
131 | return isset($content['title']) && isset($content['html']) && isset($content['url']) && isset($content['language']) && isset($content['content_type']); | 147 | return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); |
132 | } | 148 | } |
133 | } | 149 | } |