+ $entry->setTitle($path);
+ }
+
+ /**
+ * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
+ *
+ * @param $title
+ * @param $contentType
+ *
+ * @return string
+ */
+ private function sanitizeContentTitle($title, $contentType)
+ {
+ if ('application/pdf' === $contentType) {
+ $title = $this->convertPdfEncodingToUTF8($title);
+ }
+
+ return $this->sanitizeUTF8Text($title);
+ }
+
+ /**
+ * If the title from the fetched content comes from a PDF, then its very possible that the character encoding is not
+ * UTF-8. This methods tries to identify the character encoding and translate the title to UTF-8.
+ *
+ * @param $title
+ *
+ * @return string (maybe contains invalid UTF-8 character)
+ */
+ private function convertPdfEncodingToUTF8($title)
+ {
+ // first try UTF-8 because its easier to detect its present/absence
+ foreach (['UTF-8', 'UTF-16BE', 'WINDOWS-1252'] as $encoding) {
+ if (mb_check_encoding($title, $encoding)) {
+ return mb_convert_encoding($title, 'UTF-8', $encoding);
+ }
+ }
+
+ return $title;
+ }
+
+ /**
+ * Remove invalid UTF-8 characters from the given string.
+ *
+ * @param string $rawText
+ *
+ * @return string
+ */
+ private function sanitizeUTF8Text($rawText)
+ {
+ if (mb_check_encoding($rawText, 'UTF-8')) {
+ return $rawText;
+ }
+
+ return iconv('UTF-8', 'UTF-8//IGNORE', $rawText);
+ }
+
+ /**
+ * Stock entry with fetched or imported content.
+ * Will fall back to OpenGraph data if available.
+ *
+ * @param Entry $entry Entry to stock
+ * @param array $content Array with at least title, url & html
+ */
+ private function stockEntry(Entry $entry, array $content)
+ {
+ $this->updateOriginUrl($entry, $content['url']);
+
+ $this->setEntryDomainName($entry);
+
+ if (!empty($content['title'])) {
+ $entry->setTitle($content['title']);
+ } elseif (!empty($content['open_graph']['og_title'])) {
+ $entry->setTitle($content['open_graph']['og_title']);
+ }
+
+ if (empty($content['html'])) {
+ $content['html'] = $this->fetchingErrorMessage;
+
+ if (!empty($content['open_graph']['og_description'])) {
+ $content['html'] .= '<p><i>But we found a short description: </i></p>';
+ $content['html'] .= $content['open_graph']['og_description'];
+ }
+ }
+
+ $entry->setContent($content['html']);
+ $entry->setReadingTime(Utils::getReadingTime($content['html']));
+
+ if (!empty($content['status'])) {
+ $entry->setHttpStatus($content['status']);
+ }
+
+ if (!empty($content['authors']) && \is_array($content['authors'])) {
+ $entry->setPublishedBy($content['authors']);
+ }
+
+ if (!empty($content['all_headers']) && $this->storeArticleHeaders) {
+ $entry->setHeaders($content['all_headers']);
+ }
+
+ if (!empty($content['date'])) {
+ $this->updatePublishedAt($entry, $content['date']);
+ }
+
+ if (!empty($content['language'])) {
+ $this->updateLanguage($entry, $content['language']);
+ }
+
+ if (!empty($content['open_graph']['og_image'])) {
+ $this->updatePreviewPicture($entry, $content['open_graph']['og_image']);
+ }
+
+ // if content is an image, define it as a preview too
+ if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
+ $this->updatePreviewPicture($entry, $content['url']);
+ }
+
+ if (!empty($content['content_type'])) {
+ $entry->setMimetype($content['content_type']);