--- /dev/null
+<?php
+
+/**
+ * Extract title from an HTML document.
+ *
+ * @param string $html HTML content where to look for a title.
+ *
+ * @return bool|string Extracted title if found, false otherwise.
+ */
+function html_extract_title($html)
+{
+ if (preg_match('!<title>(.*)</title>!is', $html, $matches)) {
+ return trim(str_replace("\n", ' ', $matches[1]));
+ }
+ return false;
+}
+
+/**
+ * Determine charset from downloaded page.
+ * Priority:
+ * 1. HTTP headers (Content type).
+ * 2. HTML content page (tag <meta charset>).
+ * 3. Use a default charset (default: UTF-8).
+ *
+ * @param array $headers HTTP headers array.
+ * @param string $htmlContent HTML content where to look for charset.
+ * @param string $defaultCharset Default charset to apply if other methods failed.
+ *
+ * @return string Determined charset.
+ */
+function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8')
+{
+ if ($charset = headers_extract_charset($headers)) {
+ return $charset;
+ }
+
+ if ($charset = html_extract_charset($htmlContent)) {
+ return $charset;
+ }
+
+ return $defaultCharset;
+}
+
+/**
+ * Extract charset from HTTP headers if it's defined.
+ *
+ * @param array $headers HTTP headers array.
+ *
+ * @return bool|string Charset string if found (lowercase), false otherwise.
+ */
+function headers_extract_charset($headers)
+{
+ if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) {
+ preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match);
+ if (! empty($match[1])) {
+ return strtolower(trim($match[1]));
+ }
+ }
+
+ return false;
+}
+
+/**
+ * Extract charset HTML content (tag <meta charset>).
+ *
+ * @param string $html HTML content where to look for charset.
+ *
+ * @return bool|string Charset string if found, false otherwise.
+ */
+function html_extract_charset($html)
+{
+ // Get encoding specified in HTML header.
+ preg_match('#<meta .*charset="?([^">/]+)"? */?>#Usi', $html, $enc);
+ if (!empty($enc[1])) {
+ return strtolower($enc[1]);
+ }
+
+ return false;
+}