- // Look for charset in html header.
- preg_match('#<meta .*charset=.*>#Usi', $data, $meta);
-
- // If found, extract encoding.
- if (!empty($meta[0])) {
- // Get encoding specified in header.
- preg_match('#charset="?(.*)"#si', $meta[0], $enc);
- // If charset not found, use utf-8.
- $html_charset = (!empty($enc[1])) ? strtolower($enc[1]) : 'utf-8';
- }
- else {
- $html_charset = 'utf-8';
- }
-
- // Extract title
- $title = html_extract_title($data);
- if (!empty($title)) {
- // Re-encode title in utf-8 if necessary.
- $title = ($html_charset == 'iso-8859-1') ? utf8_encode($title) : $title;
+ // Retrieve charset.
+ $charset = get_charset($headers, $content);
+ // Extract title.
+ $title = html_extract_title($content);
+ // Re-encode title in utf-8 if necessary.
+ if (! empty($title) && $charset != 'utf-8') {
+ $title = mb_convert_encoding($title, $charset, 'utf-8');