- * We can also force the content, in case of an import from the v1 for example, so the function won't
- * fetch the content from the website but rather use information given with the $content parameter.
- *
- * @param Entry $entry Entry to update
- * @param string $url Url to grab content for
- * @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url
+ * @param Entry $entry Entry to update
+ * @param string $url Url of the content
+ * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
+ * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
+ */
+ public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
+ {
+ if (!empty($content['html'])) {
+ $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
+ }
+
+ if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
+ $fetchedContent = $this->graby->fetchContent($url);
+ $fetchedContent['title'] = $this->sanitizeUTF8Text($fetchedContent['title']);
+
+ // when content is imported, we have information in $content
+ // in case fetching content goes bad, we'll keep the imported information instead of overriding them
+ if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) {
+ $content = $fetchedContent;
+ }
+ }
+
+ // be sure to keep the url in case of error
+ // so we'll be able to refetch it in the future
+ $content['url'] = !empty($content['url']) ? $content['url'] : $url;
+
+ $this->stockEntry($entry, $content);
+ }
+
+ /**
+ * Remove invalid UTF-8 characters from the given string in following steps:
+ * - try to interpret the given string as ISO-8859-1, convert it to UTF-8 and return it (if its valid)
+ * - simply remove every invalid UTF-8 character and return the result (https://stackoverflow.com/a/1433665)
+ * @param String $rawText
+ * @return string
+ */
+ private function sanitizeUTF8Text(String $rawText) {
+ if (mb_check_encoding($rawText, 'utf-8')) {
+ return $rawText; // return because its valid utf-8 text
+ }
+
+ // we assume that $text is encoded in ISO-8859-1 (and not the similar Windows-1252 or other encoding)
+ $convertedText = utf8_encode($rawText);
+ if (mb_check_encoding($convertedText, 'utf-8')) {
+ return $convertedText;
+ }
+
+ // last resort: simply remove invalid UTF-8 character because $rawText can have some every exotic encoding
+ return iconv("UTF-8", "UTF-8//IGNORE", $rawText);
+ }
+
+ /**
+ * Use a Symfony validator to ensure the language is well formatted.