From c78c1a3f08815aab99752026ccdf1dcf63cf43c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Mon, 19 Jan 2015 21:27:22 +0100 Subject: @fivefilters via composer --- .../libraries/content-extractor/SiteConfig.php | 343 --------------------- 1 file changed, 343 deletions(-) delete mode 100644 inc/3rdparty/libraries/content-extractor/SiteConfig.php (limited to 'inc/3rdparty/libraries/content-extractor/SiteConfig.php') diff --git a/inc/3rdparty/libraries/content-extractor/SiteConfig.php b/inc/3rdparty/libraries/content-extractor/SiteConfig.php deleted file mode 100644 index 1f6a7603..00000000 --- a/inc/3rdparty/libraries/content-extractor/SiteConfig.php +++ /dev/null @@ -1,343 +0,0 @@ -tidy)) ? $this->tidy : $this->default_tidy; - return $this->tidy; - } - - // return bool or null - public function prune($use_default=true) { - if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune; - return $this->prune; - } - - // return string or null - public function parser($use_default=true) { - if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser; - return $this->parser; - } - - // return bool or null - public function autodetect_on_failure($use_default=true) { - if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure; - return $this->autodetect_on_failure; - } - - public static function set_config_path($path, $fallback=null) { - self::$config_path = $path; - self::$config_path_fallback = $fallback; - } - - public static function add_to_cache($key, SiteConfig $config, $use_apc=true) { - $key = strtolower($key); - if (substr($key, 0, 4) == 'www.') $key = substr($key, 4); - if ($config->cache_key) $key = $config->cache_key; - self::$config_cache[$key] = $config; - if (self::$apc && $use_apc) { - self::debug("Adding site config to APC cache with key sc.$key"); - apc_add("sc.$key", $config); - } - self::debug("Cached site config with key $key"); - } - - public static function is_cached($key) { - $key = strtolower($key); - if (substr($key, 0, 4) == 'www.') $key = substr($key, 4); - if (array_key_exists($key, self::$config_cache)) { - return true; - } elseif (self::$apc && (bool)apc_fetch("sc.$key")) { - return true; - } - return false; - } - - public function append(SiteConfig $newconfig) { - // check for commands where we accept multiple statements (no test_url) - foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header') as $var) { - // append array elements for this config variable from $newconfig to this config - //$this->$var = $this->$var + $newconfig->$var; - $this->$var = array_unique(array_merge($this->$var, $newconfig->$var)); - } - // check for single statement commands - // we do not overwrite existing non null values - foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) { - if ($this->$var === null) $this->$var = $newconfig->$var; - } - // treat find_string and replace_string separately (don't apply array_unique) (thanks fabrizio!) - foreach (array('find_string', 'replace_string') as $var) { - // append array elements for this config variable from $newconfig to this config - //$this->$var = $this->$var + $newconfig->$var; - $this->$var = array_merge($this->$var, $newconfig->$var); - } - } - - // returns SiteConfig instance if an appropriate one is found, false otherwise - // if $exact_host_match is true, we will not look for wildcard config matches - // by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists - public static function build($host, $exact_host_match=false) { - $host = strtolower($host); - if (substr($host, 0, 4) == 'www.') $host = substr($host, 4); - if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false; - // check for site configuration - $try = array($host); - // should we look for wildcard matches - if (!$exact_host_match) { - $split = explode('.', $host); - if (count($split) > 1) { - array_shift($split); - $try[] = '.'.implode('.', $split); - } - } - - // look for site config file in primary folder - self::debug(". looking for site config for $host in primary folder"); - foreach ($try as $h) { - if (array_key_exists($h, self::$config_cache)) { - self::debug("... site config for $h already loaded in this request"); - return self::$config_cache[$h]; - } elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) { - self::debug("... site config for $h in APC cache"); - return $sconfig; - } elseif (file_exists(self::$config_path."/$h.txt")) { - self::debug("... found site config ($h.txt)"); - $file_primary = self::$config_path."/$h.txt"; - $matched_name = $h; - break; - } - } - - // if we found site config, process it - if (isset($file_primary)) { - $config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); - if (!$config_lines || !is_array($config_lines)) return false; - $config = self::build_from_array($config_lines); - // if APC caching is available and enabled, mark this for cache - //$config->cache_in_apc = true; - $config->cache_key = $matched_name; - - // if autodetec on failure is off (on by default) we do not need to look - // in secondary folder - if (!$config->autodetect_on_failure()) { - self::debug('... autodetect on failure is disabled (no other site config files will be loaded)'); - return $config; - } - } - - // look for site config file in secondary folder - if (isset(self::$config_path_fallback)) { - self::debug(". looking for site config for $host in secondary folder"); - foreach ($try as $h) { - if (file_exists(self::$config_path_fallback."/$h.txt")) { - self::debug("... found site config in secondary folder ($h.txt)"); - $file_secondary = self::$config_path_fallback."/$h.txt"; - $matched_name = $h; - break; - } - } - if (!isset($file_secondary)) { - self::debug("... no site config match in secondary folder"); - } - } - - // return false if no config file found - if (!isset($file_primary) && !isset($file_secondary)) { - self::debug("... no site config match for $host"); - return false; - } - - // return primary config if secondary not found - if (!isset($file_secondary) && isset($config)) { - return $config; - } - - // process secondary config file - $config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); - if (!$config_lines || !is_array($config_lines)) { - // failed to process secondary - if (isset($config)) { - // return primary config - return $config; - } else { - return false; - } - } - - // merge with primary and return - if (isset($config)) { - self::debug('. merging config files'); - $config->append(self::build_from_array($config_lines)); - return $config; - } else { - // return just secondary - $config = self::build_from_array($config_lines); - // if APC caching is available and enabled, mark this for cache - //$config->cache_in_apc = true; - $config->cache_key = $matched_name; - return $config; - } - } - - public static function build_from_array(array $lines) { - $config = new SiteConfig(); - foreach ($lines as $line) { - $line = trim($line); - - // skip comments, empty lines - if ($line == '' || $line[0] == '#') continue; - - // get command - $command = explode(':', $line, 2); - // if there's no colon ':', skip this line - if (count($command) != 2) continue; - $val = trim($command[1]); - $command = trim($command[0]); - if ($command == '' || $val == '') continue; - - // check for commands where we accept multiple statements - if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) { - array_push($config->$command, $val); - // check for single statement commands that evaluate to true or false - } elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) { - $config->$command = ($val == 'yes'); - // check for single statement commands stored as strings - } elseif (in_array($command, array('parser'))) { - $config->$command = $val; - // check for replace_string(find): replace - } elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) { - if (in_array($match[1], array('replace_string'))) { - $command = $match[1]; - array_push($config->find_string, $match[2]); - array_push($config->$command, $val); - } - } - } - return $config; - } -} \ No newline at end of file -- cgit v1.2.3