- // we assume that $text is encoded in ISO-8859-1 (and not the similar Windows-1252 or other encoding)
- $convertedText = utf8_encode($rawText);
- if (mb_check_encoding($convertedText, 'utf-8')) {
- return $convertedText;
+ /**
+ * If the title from the fetched content comes from a PDF, then its very possible that the character encoding is not
+ * UTF-8. This methods tries to identify the character encoding and translate the title to UTF-8.
+ * @param $title
+ * @return string (maybe contains invalid UTF-8 character)
+ */
+ private function convertPdfEncodingToUTF8($title) {
+ // first try UTF-16 (then UTF-8) because its easier to detect its present/absence
+ foreach (array('UTF-16BE', 'UTF-16LE', 'UTF-8', 'WINDOWS-1252') as $encoding) {
+ if (mb_check_encoding($title, $encoding)) {
+ return mb_convert_encoding($title, 'UTF-8', $encoding);
+ }