From ec3972361d95f6f5956df77f7a76105b5ae6af72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 25 Aug 2013 20:10:23 +0200 Subject: poche now uses Full Text RSS to fetch content --- inc/3rdparty/content-extractor/SiteConfig.php | 184 ++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 inc/3rdparty/content-extractor/SiteConfig.php (limited to 'inc/3rdparty/content-extractor/SiteConfig.php') diff --git a/inc/3rdparty/content-extractor/SiteConfig.php b/inc/3rdparty/content-extractor/SiteConfig.php new file mode 100644 index 00000000..089e10c6 --- /dev/null +++ b/inc/3rdparty/content-extractor/SiteConfig.php @@ -0,0 +1,184 @@ + 200) || !preg_match(self::HOSTNAME_REGEX, $host)) return false; + // check for site configuration + $try = array($host); + $split = explode('.', $host); + if (count($split) > 1) { + array_shift($split); + $try[] = '.'.implode('.', $split); + } + foreach ($try as $h) { + if (array_key_exists($h, self::$config_cache)) { + self::debug("... cached ($h)"); + return self::$config_cache[$h]; + } elseif (file_exists(self::$config_path."/$h.txt")) { + self::debug("... from file ($h)"); + $file = self::$config_path."/$h.txt"; + break; + } + } + if (!isset($file)) { + if (isset(self::$config_path_fallback)) { + self::debug("... trying fallback ($host)"); + foreach ($try as $h) { + if (file_exists(self::$config_path_fallback."/$h.txt")) { + self::debug("... from fallback file ($h)"); + $file = self::$config_path_fallback."/$h.txt"; + break; + } + } + if (!isset($file)) { + self::debug("... no match in fallback directory"); + return false; + } + } else { + self::debug("... no match ($host)"); + return false; + } + } + $config_file = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + if (!$config_file || !is_array($config_file)) return false; + $config = new SiteConfig(); + foreach ($config_file as $line) { + $line = trim($line); + + // skip comments, empty lines + if ($line == '' || $line[0] == '#') continue; + + // get command + $command = explode(':', $line, 2); + // if there's no colon ':', skip this line + if (count($command) != 2) continue; + $val = trim($command[1]); + $command = trim($command[0]); + if ($command == '' || $val == '') continue; + + // check for commands where we accept multiple statements + if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'http_header'))) { + array_push($config->$command, $val); + // check for single statement commands that evaluate to true or false + } elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) { + $config->$command = ($val == 'yes'); + // check for single statement commands stored as strings + } elseif (in_array($command, array('test_url', 'parser'))) { + $config->$command = $val; + } elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) { + if (in_array($match[1], array('replace_string'))) { + $command = $match[1]; + array_push($config->$command, array($match[2], $val)); + } + } + } + return $config; + } +} +?> \ No newline at end of file -- cgit v1.2.3