@fivefilters via composer

author: Nicolas Lœuillet <nicolas@loeuillet.org> 2015-01-19 21:27:22 +0100
committer: Nicolas Lœuillet <nicolas@loeuillet.org> 2015-01-19 21:27:22 +0100
commit: c78c1a3f08815aab99752026ccdf1dcf63cf43c1 (patch)
tree: 99fb545cda7c0850e047aaf6d0060330236fb6e7 /inc/3rdparty/libraries/language-detect/LanguageDetect
parent: 9e7f6caf03b90076ba7b448aa7f11d40f584045f (diff)
download: wallabag-c78c1a3f08815aab99752026ccdf1dcf63cf43c1.tar.gz
wallabag-c78c1a3f08815aab99752026ccdf1dcf63cf43c1.tar.zst
wallabag-c78c1a3f08815aab99752026ccdf1dcf63cf43c1.zip
3 files changed, 0 insertions, 743 deletions
diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php
deleted file mode 100644
index 196d994f..00000000
--- a/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php
+++ /dev/null
@@ -1,57 +0,0 @@
-<?php
-class Text_LanguageDetect_Exception extends Exception
-{
-    /**
-     * Database file could not be found
-     */
-    const DB_NOT_FOUND = 10;
-    /**
-     * Database file found, but not readable
-     */
-    const DB_NOT_READABLE = 11;
-    /**
-     * Database file is empty
-     */
-    const DB_EMPTY = 12;
-    /**
-     * Database contents is not a PHP array
-     */
-    const DB_NOT_ARRAY = 13;
-    /**
-     * Magic quotes are activated
-     */
-    const MAGIC_QUOTES = 14;
-    /**
-     * Parameter of invalid type passed to method
-     */
-    const PARAM_TYPE = 20;
-    /**
-     * Character in parameter is invalid
-     */
-    const INVALID_CHAR = 21;
-    /**
-     * Language is not in the database
-     */
-    const UNKNOWN_LANGUAGE = 30;
-    /**
-     * Error during block detection
-     */
-    const BLOCK_DETECTION = 40;
-    /**
-     * Error while clustering languages
-     */
-    const NO_HIGHEST_KEY = 50;
-}
diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php
deleted file mode 100644
index 05b0590d..00000000
--- a/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php
+++ /dev/null
@@ -1,339 +0,0 @@
-<?php
-/**
- * Part of Text_LanguageDetect
- *
- * PHP version 5
- *
- * @category  Text
- * @package   Text_LanguageDetect
- * @author    Christian Weiske <cweiske@php.net>
- * @copyright 2011 Christian Weiske <cweiske@php.net>
- * @license   http://www.debian.org/misc/bsd.license BSD
- * @version   SVN: $Id$
- * @link      http://pear.php.net/package/Text_LanguageDetect/
- */
-/**
- * Provides a mapping between the languages from lang.dat and the
- * ISO 639-1 and ISO-639-2 codes.
- *
- * Note that this class contains only languages that exist in lang.dat.
- *
- * @category  Text
- * @package   Text_LanguageDetect
- * @author    Christian Weiske <cweiske@php.net>
- * @copyright 2011 Christian Weiske <cweiske@php.net>
- * @license   http://www.debian.org/misc/bsd.license BSD
- * @link      http://www.loc.gov/standards/iso639-2/php/code_list.php
- */
-class Text_LanguageDetect_ISO639
-{
-    /**
-     * Maps all language names from the language database to the
-     * ISO 639-1 2-letter language code.
-     *
-     * NULL indicates that there is no 2-letter code.
-     *
-     * @var array
-     */
-    public static $nameToCode2 = array(
-        'albanian'   => 'sq',
-        'arabic'     => 'ar',
-        'azeri'      => 'az',
-        'bengali'    => 'bn',
-        'bulgarian'  => 'bg',
-        'cebuano'    => null,
-        'croatian'   => 'hr',
-        'czech'      => 'cs',
-        'danish'     => 'da',
-        'dutch'      => 'nl',
-        'english'    => 'en',
-        'estonian'   => 'et',
-        'farsi'      => 'fa',
-        'finnish'    => 'fi',
-        'french'     => 'fr',
-        'german'     => 'de',
-        'hausa'      => 'ha',
-        'hawaiian'   => null,
-        'hindi'      => 'hi',
-        'hungarian'  => 'hu',
-        'icelandic'  => 'is',
-        'indonesian' => 'id',
-        'italian'    => 'it',
-        'kazakh'     => 'kk',
-        'kyrgyz'     => 'ky',
-        'latin'      => 'la',
-        'latvian'    => 'lv',
-        'lithuanian' => 'lt',
-        'macedonian' => 'mk',
-        'mongolian'  => 'mn',
-        'nepali'     => 'ne',
-        'norwegian'  => 'no',
-        'pashto'     => 'ps',
-        'pidgin'     => null,
-        'polish'     => 'pl',
-        'portuguese' => 'pt',
-        'romanian'   => 'ro',
-        'russian'    => 'ru',
-        'serbian'    => 'sr',
-        'slovak'     => 'sk',
-        'slovene'    => 'sl',
-        'somali'     => 'so',
-        'spanish'    => 'es',
-        'swahili'    => 'sw',
-        'swedish'    => 'sv',
-        'tagalog'    => 'tl',
-        'turkish'    => 'tr',
-        'ukrainian'  => 'uk',
-        'urdu'       => 'ur',
-        'uzbek'      => 'uz',
-        'vietnamese' => 'vi',
-        'welsh'      => 'cy',
-    );
-    /**
-     * Maps all language names from the language database to the
-     * ISO 639-2 3-letter language code.
-     *
-     * @var array
-     */
-    public static $nameToCode3 = array(
-        'albanian'   => 'sqi',
-        'arabic'     => 'ara',
-        'azeri'      => 'aze',
-        'bengali'    => 'ben',
-        'bulgarian'  => 'bul',
-        'cebuano'    => 'ceb',
-        'croatian'   => 'hrv',
-        'czech'      => 'ces',
-        'danish'     => 'dan',
-        'dutch'      => 'nld',
-        'english'    => 'eng',
-        'estonian'   => 'est',
-        'farsi'      => 'fas',
-        'finnish'    => 'fin',
-        'french'     => 'fra',
-        'german'     => 'deu',
-        'hausa'      => 'hau',
-        'hawaiian'   => 'haw',
-        'hindi'      => 'hin',
-        'hungarian'  => 'hun',
-        'icelandic'  => 'isl',
-        'indonesian' => 'ind',
-        'italian'    => 'ita',
-        'kazakh'     => 'kaz',
-        'kyrgyz'     => 'kir',
-        'latin'      => 'lat',
-        'latvian'    => 'lav',
-        'lithuanian' => 'lit',
-        'macedonian' => 'mkd',
-        'mongolian'  => 'mon',
-        'nepali'     => 'nep',
-        'norwegian'  => 'nor',
-        'pashto'     => 'pus',
-        'pidgin'     => 'crp',
-        'polish'     => 'pol',
-        'portuguese' => 'por',
-        'romanian'   => 'ron',
-        'russian'    => 'rus',
-        'serbian'    => 'srp',
-        'slovak'     => 'slk',
-        'slovene'    => 'slv',
-        'somali'     => 'som',
-        'spanish'    => 'spa',
-        'swahili'    => 'swa',
-        'swedish'    => 'swe',
-        'tagalog'    => 'tgl',
-        'turkish'    => 'tur',
-        'ukrainian'  => 'ukr',
-        'urdu'       => 'urd',
-        'uzbek'      => 'uzb',
-        'vietnamese' => 'vie',
-        'welsh'      => 'cym',
-    );
-    /**
-     * Maps ISO 639-1 2-letter language codes to the language names
-     * in the language database
-     *
-     * Not all languages have a 2 letter code, so some are missing
-     *
-     * @var array
-     */
-    public static $code2ToName = array(
-        'ar' => 'arabic',
-        'az' => 'azeri',
-        'bg' => 'bulgarian',
-        'bn' => 'bengali',
-        'cs' => 'czech',
-        'cy' => 'welsh',
-        'da' => 'danish',
-        'de' => 'german',
-        'en' => 'english',
-        'es' => 'spanish',
-        'et' => 'estonian',
-        'fa' => 'farsi',
-        'fi' => 'finnish',
-        'fr' => 'french',
-        'ha' => 'hausa',
-        'hi' => 'hindi',
-        'hr' => 'croatian',
-        'hu' => 'hungarian',
-        'id' => 'indonesian',
-        'is' => 'icelandic',
-        'it' => 'italian',
-        'kk' => 'kazakh',
-        'ky' => 'kyrgyz',
-        'la' => 'latin',
-        'lt' => 'lithuanian',
-        'lv' => 'latvian',
-        'mk' => 'macedonian',
-        'mn' => 'mongolian',
-        'ne' => 'nepali',
-        'nl' => 'dutch',
-        'no' => 'norwegian',
-        'pl' => 'polish',
-        'ps' => 'pashto',
-        'pt' => 'portuguese',
-        'ro' => 'romanian',
-        'ru' => 'russian',
-        'sk' => 'slovak',
-        'sl' => 'slovene',
-        'so' => 'somali',
-        'sq' => 'albanian',
-        'sr' => 'serbian',
-        'sv' => 'swedish',
-        'sw' => 'swahili',
-        'tl' => 'tagalog',
-        'tr' => 'turkish',
-        'uk' => 'ukrainian',
-        'ur' => 'urdu',
-        'uz' => 'uzbek',
-        'vi' => 'vietnamese',
-    );
-    /**
-     * Maps ISO 639-2 3-letter language codes to the language names
-     * in the language database.
-     *
-     * @var array
-     */
-    public static $code3ToName = array(
-        'ara' => 'arabic',
-        'aze' => 'azeri',
-        'ben' => 'bengali',
-        'bul' => 'bulgarian',
-        'ceb' => 'cebuano',
-        'ces' => 'czech',
-        'crp' => 'pidgin',
-        'cym' => 'welsh',
-        'dan' => 'danish',
-        'deu' => 'german',
-        'eng' => 'english',
-        'est' => 'estonian',
-        'fas' => 'farsi',
-        'fin' => 'finnish',
-        'fra' => 'french',
-        'hau' => 'hausa',
-        'haw' => 'hawaiian',
-        'hin' => 'hindi',
-        'hrv' => 'croatian',
-        'hun' => 'hungarian',
-        'ind' => 'indonesian',
-        'isl' => 'icelandic',
-        'ita' => 'italian',
-        'kaz' => 'kazakh',
-        'kir' => 'kyrgyz',
-        'lat' => 'latin',
-        'lav' => 'latvian',
-        'lit' => 'lithuanian',
-        'mkd' => 'macedonian',
-        'mon' => 'mongolian',
-        'nep' => 'nepali',
-        'nld' => 'dutch',
-        'nor' => 'norwegian',
-        'pol' => 'polish',
-        'por' => 'portuguese',
-        'pus' => 'pashto',
-        'rom' => 'romanian',
-        'rus' => 'russian',
-        'slk' => 'slovak',
-        'slv' => 'slovene',
-        'som' => 'somali',
-        'spa' => 'spanish',
-        'sqi' => 'albanian',
-        'srp' => 'serbian',
-        'swa' => 'swahili',
-        'swe' => 'swedish',
-        'tgl' => 'tagalog',
-        'tur' => 'turkish',
-        'ukr' => 'ukrainian',
-        'urd' => 'urdu',
-        'uzb' => 'uzbek',
-        'vie' => 'vietnamese',
-    );
-    /**
-     * Returns the 2-letter ISO 639-1 code for the given language name.
-     *
-     * @param string $lang English language name like "swedish"
-     *
-     * @return string Two-letter language code (e.g. "sv") or NULL if not found
-     */
-    public static function nameToCode2($lang)
-    {
-        $lang = strtolower($lang);
-        if (!isset(self::$nameToCode2[$lang])) {
-            return null;
-        }
-        return self::$nameToCode2[$lang];
-    }
-    /**
-     * Returns the 3-letter ISO 639-2 code for the given language name.
-     *
-     * @param string $lang English language name like "swedish"
-     *
-     * @return string Three-letter language code (e.g. "swe") or NULL if not found
-     */
-    public static function nameToCode3($lang)
-    {
-        $lang = strtolower($lang);
-        if (!isset(self::$nameToCode3[$lang])) {
-            return null;
-        }
-        return self::$nameToCode3[$lang];
-    }
-    /**
-     * Returns the language name for the given 2-letter ISO 639-1 code.
-     *
-     * @param string $code Two-letter language code (e.g. "sv")
-     *
-     * @return string English language name like "swedish"
-     */
-    public static function code2ToName($code)
-    {
-        $lang = strtolower($code);
-        if (!isset(self::$code2ToName[$code])) {
-            return null;
-        }
-        return self::$code2ToName[$code];
-    }
-    /**
-     * Returns the language name for the given 3-letter ISO 639-2 code.
-     *
-     * @param string $code Three-letter language code (e.g. "swe")
-     *
-     * @return string English language name like "swedish"
-     */
-    public static function code3ToName($code)
-    {
-        $lang = strtolower($code);
-        if (!isset(self::$code3ToName[$code])) {
-            return null;
-        }
-        return self::$code3ToName[$code];
-    }
-}
-\ No newline at end of file
diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php
deleted file mode 100644
index fb0e1e20..00000000
--- a/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php
+++ /dev/null
@@ -1,347 +0,0 @@
-<?php
-/**
- * This class represents a text sample to be parsed.
- *
- * @category    Text
- * @package     Text_LanguageDetect
- * @author      Nicholas Pisarro
- * @copyright   2006
- * @license     BSD
- * @version     CVS: $Id: Parser.php 322327 2012-01-15 17:55:59Z cweiske $
- * @link        http://pear.php.net/package/Text_LanguageDetect/
- * @link        http://langdetect.blogspot.com/
- */
-/**
- * This class represents a text sample to be parsed.
- *
- * This separates the analysis of a text sample from the primary LanguageDetect
- * class. After a new profile has been built, the data can be retrieved using
- * the accessor functions.
- *
- * This class is intended to be used by the Text_LanguageDetect class, not 
- * end-users.
- *
- * @category    Text
- * @package     Text_LanguageDetect
- * @author      Nicholas Pisarro
- * @copyright   2006
- * @license     BSD
- * @version     release: 0.3.0
- */
-class Text_LanguageDetect_Parser extends Text_LanguageDetect
-{
-    /**
-     * the piece of text being parsed
-     *
-     * @access  private
-     * @var     string
-     */
-    var $_string;
-    /**
-     * stores the trigram frequencies of the sample
-     *
-     * @access  private
-     * @var     string
-     */
-    var $_trigrams = array();
-    /**
-     * stores the trigram ranks of the sample
-     *
-     * @access  private
-     * @var     array
-     */
-    var $_trigram_ranks = array();
-    /**
-     * stores the unicode blocks of the sample
-     *
-     * @access  private
-     * @var     array
-     */
-    var $_unicode_blocks = array();
-    
-    /**
-     * Whether the parser should compile the unicode ranges
-     * 
-     * @access  private
-     * @var     bool
-     */
-    var $_compile_unicode = false;
-    /**
-     * Whether the parser should compile trigrams
-     *
-     * @access  private
-     * @var     bool
-     */
-    var $_compile_trigram = false;
-    /**
-     * Whether the trigram parser should pad the beginning of the string
-     *
-     * @access  private
-     * @var     bool
-     */
-    var $_trigram_pad_start = false;
-    /**
-     * Whether the unicode parser should skip non-alphabetical ascii chars
-     *
-     * @access  private
-     * @var     bool
-     */
-    var $_unicode_skip_symbols = true;
-    /**
-     * Constructor
-     *
-     * @access  private
-     * @param   string  $string     string to be parsed
-     */
-    function Text_LanguageDetect_Parser($string) {
-        $this->_string = $string;
-    }
-    /**
-     * Returns true if a string is suitable for parsing
-     *
-     * @param   string  $str    input string to test
-     * @return  bool            true if acceptable, false if not
-     */
-    public static function validateString($str) {
-        if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
-            return true;
-        } else {
-            return false;
-        }
-    }
-    /**
-     * turn on/off trigram counting
-     *
-     * @access  public
-     * @param   bool    $bool true for on, false for off
-     */
-    function prepareTrigram($bool = true)
-    {
-        $this->_compile_trigram = $bool;
-    }
-    /**
-     * turn on/off unicode block counting
-     *
-     * @access  public
-     * @param   bool    $bool true for on, false for off
-     */
-    function prepareUnicode($bool = true)
-    {
-        $this->_compile_unicode = $bool;
-    }
-    /**
-     * turn on/off padding the beginning of the sample string
-     *
-     * @access  public
-     * @param   bool    $bool true for on, false for off
-     */
-    function setPadStart($bool = true)
-    {
-        $this->_trigram_pad_start = $bool;
-    }
-    /**
-     * Should the unicode block counter skip non-alphabetical ascii chars?
-     *
-     * @access  public
-     * @param   bool    $bool true for on, false for off
-     */
-    function setUnicodeSkipSymbols($bool = true)
-    {
-        $this->_unicode_skip_symbols = $bool;
-    }
-    /**
-     * Returns the trigram ranks for the text sample
-     *
-     * @access  public
-     * @return  array    trigram ranks in the text sample
-     */
-    function &getTrigramRanks()
-    {
-        return $this->_trigram_ranks;
-    }
-    /**
-     * Return the trigram freqency table
-     *
-     * only used in testing to make sure the parser is working
-     *
-     * @access  public
-     * @return  array    trigram freqencies in the text sample
-     */
-    function &getTrigramFreqs()
-    {
-        return $this->_trigram;
-    }
-    /**
-     * returns the array of unicode blocks
-     *
-     * @access  public
-     * @return  array   unicode blocks in the text sample
-     */
-    function &getUnicodeBlocks()
-    {
-        return $this->_unicode_blocks;
-    }
-    /**
-     * Executes the parsing operation
-     * 
-     * Be sure to call the set*() functions to set options and the 
-     * prepare*() functions first to tell it what kind of data to compute
-     *
-     * Afterwards the get*() functions can be used to access the compiled
-     * information.
-     *
-     * @access public
-     */
-    function analyze()
-    {
-        $len = strlen($this->_string);
-        $byte_counter = 0;
-        // unicode startup
-        if ($this->_compile_unicode) {
-            $blocks = $this->_read_unicode_block_db();
-            $block_count = count($blocks);
-            $skipped_count = 0;
-            $unicode_chars = array();
-        }
-        // trigram startup
-        if ($this->_compile_trigram) {
-            // initialize them as blank so the parser will skip the first two
-            // (since it skips trigrams with more than  2 contiguous spaces)
-            $a = ' ';
-            $b = ' ';
-            // kludge
-            // if it finds a valid trigram to start and the start pad option is
-            // off, then set a variable that will be used to reduce this
-            // trigram after parsing has finished
-            if (!$this->_trigram_pad_start) {
-                $a = $this->_next_char($this->_string, $byte_counter, true);
-                if ($a != ' ') {
-                    $b = $this->_next_char($this->_string, $byte_counter, true);
-                    $dropone = " $a$b";
-                }
-                $byte_counter = 0;
-                $a = ' ';
-                $b = ' ';
-            }
-        }
-        while ($byte_counter < $len) {
-            $char = $this->_next_char($this->_string, $byte_counter, true);
-            // language trigram detection
-            if ($this->_compile_trigram) {
-                if (!($b == ' ' && ($a == ' ' || $char == ' '))) {
-                    if (!isset($this->_trigram[$a . $b . $char])) {
-                       $this->_trigram[$a . $b . $char] = 1;
-                    } else {
-                       $this->_trigram[$a . $b . $char]++;
-                    }
-                }
-                $a = $b;
-                $b = $char;
-            }
-            // unicode block detection
-            if ($this->_compile_unicode) {
-                if ($this->_unicode_skip_symbols
-                        && strlen($char) == 1
-                        && ($char < 'A' || $char > 'z'
-                        || ($char > 'Z' && $char < 'a'))
-                        && $char != "'") {  // does not skip the apostrophe
-                                            // since it's included in the language
-                                            // models
-                    $skipped_count++;
-                    continue;
-                }
-                // build an array of all the characters
-                if (isset($unicode_chars[$char])) {
-                    $unicode_chars[$char]++;
-                } else {
-                    $unicode_chars[$char] = 1;
-                }
-            }
-            // todo: add byte detection here
-        }
-        // unicode cleanup
-        if ($this->_compile_unicode) {
-            foreach ($unicode_chars as $utf8_char => $count) {
-                $search_result = $this->_unicode_block_name(
-                        $this->_utf8char2unicode($utf8_char), $blocks, $block_count);
-                if ($search_result != -1) {
-                    $block_name = $search_result[2];
-                } else {
-                    $block_name = '[Malformatted]';
-                }
-                if (isset($this->_unicode_blocks[$block_name])) {
-                    $this->_unicode_blocks[$block_name] += $count;
-                } else {
-                    $this->_unicode_blocks[$block_name] = $count;
-                }
-            }
-        }
-        // trigram cleanup
-        if ($this->_compile_trigram) {
-            // pad the end
-            if ($b != ' ') {
-                if (!isset($this->_trigram["$a$b "])) {
-                    $this->_trigram["$a$b "] = 1;
-                } else {
-                    $this->_trigram["$a$b "]++;
-                }
-            }
-            // perl compatibility; Language::Guess does not pad the beginning
-            // kludge
-            if (isset($dropone)) {
-                if ($this->_trigram[$dropone] == 1) {
-                    unset($this->_trigram[$dropone]);
-                } else {
-                    $this->_trigram[$dropone]--;
-                }
-            }
-            if (!empty($this->_trigram)) {
-                $this->_trigram_ranks = $this->_arr_rank($this->_trigram);
-            } else {
-                $this->_trigram_ranks = array();
-            }
-        }
-    }
-}
-/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
-\ No newline at end of file
author	Nicolas Lœuillet <nicolas@loeuillet.org>	2015-01-19 21:27:22 +0100
committer	Nicolas Lœuillet <nicolas@loeuillet.org>	2015-01-19 21:27:22 +0100
commit	c78c1a3f08815aab99752026ccdf1dcf63cf43c1 (patch)
tree	99fb545cda7c0850e047aaf6d0060330236fb6e7 /inc/3rdparty/libraries/language-detect/LanguageDetect
parent	9e7f6caf03b90076ba7b448aa7f11d40f584045f (diff)
download	wallabag-c78c1a3f08815aab99752026ccdf1dcf63cf43c1.tar.gz wallabag-c78c1a3f08815aab99752026ccdf1dcf63cf43c1.tar.zst wallabag-c78c1a3f08815aab99752026ccdf1dcf63cf43c1.zip

diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php deleted file mode 100644 index 196d994f..00000000 --- a/inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php +++ /dev/null
@@ -1,57 +0,0 @@
1	<?php
2	class Text_LanguageDetect_Exception extends Exception
3	{
4	/**
5	* Database file could not be found
6	*/
7	const DB_NOT_FOUND = 10;
8
9	/**
10	* Database file found, but not readable
11	*/
12	const DB_NOT_READABLE = 11;
13
14	/**
15	* Database file is empty
16	*/
17	const DB_EMPTY = 12;
18
19	/**
20	* Database contents is not a PHP array
21	*/
22	const DB_NOT_ARRAY = 13;
23
24	/**
25	* Magic quotes are activated
26	*/
27	const MAGIC_QUOTES = 14;
28
29
30	/**
31	* Parameter of invalid type passed to method
32	*/
33	const PARAM_TYPE = 20;
34
35	/**
36	* Character in parameter is invalid
37	*/
38	const INVALID_CHAR = 21;
39
40
41	/**
42	* Language is not in the database
43	*/
44	const UNKNOWN_LANGUAGE = 30;
45
46
47	/**
48	* Error during block detection
49	*/
50	const BLOCK_DETECTION = 40;
51
52
53	/**
54	* Error while clustering languages
55	*/
56	const NO_HIGHEST_KEY = 50;
57	}


diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php deleted file mode 100644 index 05b0590d..00000000 --- a/inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php +++ /dev/null
@@ -1,339 +0,0 @@
1	<?php
2	/**
3	* Part of Text_LanguageDetect
4	*
5	* PHP version 5
6	*
7	* @category Text
8	* @package Text_LanguageDetect
9	* @author Christian Weiske <cweiske@php.net>
10	* @copyright 2011 Christian Weiske <cweiske@php.net>
11	* @license http://www.debian.org/misc/bsd.license BSD
12	* @version SVN: $Id$
13	* @link http://pear.php.net/package/Text_LanguageDetect/
14	*/
15
16	/**
17	* Provides a mapping between the languages from lang.dat and the
18	* ISO 639-1 and ISO-639-2 codes.
19	*
20	* Note that this class contains only languages that exist in lang.dat.
21	*
22	* @category Text
23	* @package Text_LanguageDetect
24	* @author Christian Weiske <cweiske@php.net>
25	* @copyright 2011 Christian Weiske <cweiske@php.net>
26	* @license http://www.debian.org/misc/bsd.license BSD
27	* @link http://www.loc.gov/standards/iso639-2/php/code_list.php
28	*/
29	class Text_LanguageDetect_ISO639
30	{
31	/**
32	* Maps all language names from the language database to the
33	* ISO 639-1 2-letter language code.
34	*
35	* NULL indicates that there is no 2-letter code.
36	*
37	* @var array
38	*/
39	public static $nameToCode2 = array(
40	'albanian' => 'sq',
41	'arabic' => 'ar',
42	'azeri' => 'az',
43	'bengali' => 'bn',
44	'bulgarian' => 'bg',
45	'cebuano' => null,
46	'croatian' => 'hr',
47	'czech' => 'cs',
48	'danish' => 'da',
49	'dutch' => 'nl',
50	'english' => 'en',
51	'estonian' => 'et',
52	'farsi' => 'fa',
53	'finnish' => 'fi',
54	'french' => 'fr',
55	'german' => 'de',
56	'hausa' => 'ha',
57	'hawaiian' => null,
58	'hindi' => 'hi',
59	'hungarian' => 'hu',
60	'icelandic' => 'is',
61	'indonesian' => 'id',
62	'italian' => 'it',
63	'kazakh' => 'kk',
64	'kyrgyz' => 'ky',
65	'latin' => 'la',
66	'latvian' => 'lv',
67	'lithuanian' => 'lt',
68	'macedonian' => 'mk',
69	'mongolian' => 'mn',
70	'nepali' => 'ne',
71	'norwegian' => 'no',
72	'pashto' => 'ps',
73	'pidgin' => null,
74	'polish' => 'pl',
75	'portuguese' => 'pt',
76	'romanian' => 'ro',
77	'russian' => 'ru',
78	'serbian' => 'sr',
79	'slovak' => 'sk',
80	'slovene' => 'sl',
81	'somali' => 'so',
82	'spanish' => 'es',
83	'swahili' => 'sw',
84	'swedish' => 'sv',
85	'tagalog' => 'tl',
86	'turkish' => 'tr',
87	'ukrainian' => 'uk',
88	'urdu' => 'ur',
89	'uzbek' => 'uz',
90	'vietnamese' => 'vi',
91	'welsh' => 'cy',
92	);
93
94	/**
95	* Maps all language names from the language database to the
96	* ISO 639-2 3-letter language code.
97	*
98	* @var array
99	*/
100	public static $nameToCode3 = array(
101	'albanian' => 'sqi',
102	'arabic' => 'ara',
103	'azeri' => 'aze',
104	'bengali' => 'ben',
105	'bulgarian' => 'bul',
106	'cebuano' => 'ceb',
107	'croatian' => 'hrv',
108	'czech' => 'ces',
109	'danish' => 'dan',
110	'dutch' => 'nld',
111	'english' => 'eng',
112	'estonian' => 'est',
113	'farsi' => 'fas',
114	'finnish' => 'fin',
115	'french' => 'fra',
116	'german' => 'deu',
117	'hausa' => 'hau',
118	'hawaiian' => 'haw',
119	'hindi' => 'hin',
120	'hungarian' => 'hun',
121	'icelandic' => 'isl',
122	'indonesian' => 'ind',
123	'italian' => 'ita',
124	'kazakh' => 'kaz',
125	'kyrgyz' => 'kir',
126	'latin' => 'lat',
127	'latvian' => 'lav',
128	'lithuanian' => 'lit',
129	'macedonian' => 'mkd',
130	'mongolian' => 'mon',
131	'nepali' => 'nep',
132	'norwegian' => 'nor',
133	'pashto' => 'pus',
134	'pidgin' => 'crp',
135	'polish' => 'pol',
136	'portuguese' => 'por',
137	'romanian' => 'ron',
138	'russian' => 'rus',
139	'serbian' => 'srp',
140	'slovak' => 'slk',
141	'slovene' => 'slv',
142	'somali' => 'som',
143	'spanish' => 'spa',
144	'swahili' => 'swa',
145	'swedish' => 'swe',
146	'tagalog' => 'tgl',
147	'turkish' => 'tur',
148	'ukrainian' => 'ukr',
149	'urdu' => 'urd',
150	'uzbek' => 'uzb',
151	'vietnamese' => 'vie',
152	'welsh' => 'cym',
153	);
154
155	/**
156	* Maps ISO 639-1 2-letter language codes to the language names
157	* in the language database
158	*
159	* Not all languages have a 2 letter code, so some are missing
160	*
161	* @var array
162	*/
163	public static $code2ToName = array(
164	'ar' => 'arabic',
165	'az' => 'azeri',
166	'bg' => 'bulgarian',
167	'bn' => 'bengali',
168	'cs' => 'czech',
169	'cy' => 'welsh',
170	'da' => 'danish',
171	'de' => 'german',
172	'en' => 'english',
173	'es' => 'spanish',
174	'et' => 'estonian',
175	'fa' => 'farsi',
176	'fi' => 'finnish',
177	'fr' => 'french',
178	'ha' => 'hausa',
179	'hi' => 'hindi',
180	'hr' => 'croatian',
181	'hu' => 'hungarian',
182	'id' => 'indonesian',
183	'is' => 'icelandic',
184	'it' => 'italian',
185	'kk' => 'kazakh',
186	'ky' => 'kyrgyz',
187	'la' => 'latin',
188	'lt' => 'lithuanian',
189	'lv' => 'latvian',
190	'mk' => 'macedonian',
191	'mn' => 'mongolian',
192	'ne' => 'nepali',
193	'nl' => 'dutch',
194	'no' => 'norwegian',
195	'pl' => 'polish',
196	'ps' => 'pashto',
197	'pt' => 'portuguese',
198	'ro' => 'romanian',
199	'ru' => 'russian',
200	'sk' => 'slovak',
201	'sl' => 'slovene',
202	'so' => 'somali',
203	'sq' => 'albanian',
204	'sr' => 'serbian',
205	'sv' => 'swedish',
206	'sw' => 'swahili',
207	'tl' => 'tagalog',
208	'tr' => 'turkish',
209	'uk' => 'ukrainian',
210	'ur' => 'urdu',
211	'uz' => 'uzbek',
212	'vi' => 'vietnamese',
213	);
214
215	/**
216	* Maps ISO 639-2 3-letter language codes to the language names
217	* in the language database.
218	*
219	* @var array
220	*/
221	public static $code3ToName = array(
222	'ara' => 'arabic',
223	'aze' => 'azeri',
224	'ben' => 'bengali',
225	'bul' => 'bulgarian',
226	'ceb' => 'cebuano',
227	'ces' => 'czech',
228	'crp' => 'pidgin',
229	'cym' => 'welsh',
230	'dan' => 'danish',
231	'deu' => 'german',
232	'eng' => 'english',
233	'est' => 'estonian',
234	'fas' => 'farsi',
235	'fin' => 'finnish',
236	'fra' => 'french',
237	'hau' => 'hausa',
238	'haw' => 'hawaiian',
239	'hin' => 'hindi',
240	'hrv' => 'croatian',
241	'hun' => 'hungarian',
242	'ind' => 'indonesian',
243	'isl' => 'icelandic',
244	'ita' => 'italian',
245	'kaz' => 'kazakh',
246	'kir' => 'kyrgyz',
247	'lat' => 'latin',
248	'lav' => 'latvian',
249	'lit' => 'lithuanian',
250	'mkd' => 'macedonian',
251	'mon' => 'mongolian',
252	'nep' => 'nepali',
253	'nld' => 'dutch',
254	'nor' => 'norwegian',
255	'pol' => 'polish',
256	'por' => 'portuguese',
257	'pus' => 'pashto',
258	'rom' => 'romanian',
259	'rus' => 'russian',
260	'slk' => 'slovak',
261	'slv' => 'slovene',
262	'som' => 'somali',
263	'spa' => 'spanish',
264	'sqi' => 'albanian',
265	'srp' => 'serbian',
266	'swa' => 'swahili',
267	'swe' => 'swedish',
268	'tgl' => 'tagalog',
269	'tur' => 'turkish',
270	'ukr' => 'ukrainian',
271	'urd' => 'urdu',
272	'uzb' => 'uzbek',
273	'vie' => 'vietnamese',
274	);
275
276	/**
277	* Returns the 2-letter ISO 639-1 code for the given language name.
278	*
279	* @param string $lang English language name like "swedish"
280	*
281	* @return string Two-letter language code (e.g. "sv") or NULL if not found
282	*/
283	public static function nameToCode2($lang)
284	{
285	$lang = strtolower($lang);
286	if (!isset(self::$nameToCode2[$lang])) {
287	return null;
288	}
289	return self::$nameToCode2[$lang];
290	}
291
292	/**
293	* Returns the 3-letter ISO 639-2 code for the given language name.
294	*
295	* @param string $lang English language name like "swedish"
296	*
297	* @return string Three-letter language code (e.g. "swe") or NULL if not found
298	*/
299	public static function nameToCode3($lang)
300	{
301	$lang = strtolower($lang);
302	if (!isset(self::$nameToCode3[$lang])) {
303	return null;
304	}
305	return self::$nameToCode3[$lang];
306	}
307
308	/**
309	* Returns the language name for the given 2-letter ISO 639-1 code.
310	*
311	* @param string $code Two-letter language code (e.g. "sv")
312	*
313	* @return string English language name like "swedish"
314	*/
315	public static function code2ToName($code)
316	{
317	$lang = strtolower($code);
318	if (!isset(self::$code2ToName[$code])) {
319	return null;
320	}
321	return self::$code2ToName[$code];
322	}
323
324	/**
325	* Returns the language name for the given 3-letter ISO 639-2 code.
326	*
327	* @param string $code Three-letter language code (e.g. "swe")
328	*
329	* @return string English language name like "swedish"
330	*/
331	public static function code3ToName($code)
332	{
333	$lang = strtolower($code);
334	if (!isset(self::$code3ToName[$code])) {
335	return null;
336	}
337	return self::$code3ToName[$code];
338	}
339	} \ No newline at end of file


diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php b/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php deleted file mode 100644 index fb0e1e20..00000000 --- a/inc/3rdparty/libraries/language-detect/LanguageDetect/Parser.php +++ /dev/null
@@ -1,347 +0,0 @@
1	<?php
2
3	/**
4	* This class represents a text sample to be parsed.
5	*
6	* @category Text
7	* @package Text_LanguageDetect
8	* @author Nicholas Pisarro
9	* @copyright 2006
10	* @license BSD
11	* @version CVS: $Id: Parser.php 322327 2012-01-15 17:55:59Z cweiske $
12	* @link http://pear.php.net/package/Text_LanguageDetect/
13	* @link http://langdetect.blogspot.com/
14	*/
15
16	/**
17	* This class represents a text sample to be parsed.
18	*
19	* This separates the analysis of a text sample from the primary LanguageDetect
20	* class. After a new profile has been built, the data can be retrieved using
21	* the accessor functions.
22	*
23	* This class is intended to be used by the Text_LanguageDetect class, not
24	* end-users.
25	*
26	* @category Text
27	* @package Text_LanguageDetect
28	* @author Nicholas Pisarro
29	* @copyright 2006
30	* @license BSD
31	* @version release: 0.3.0
32	*/
33	class Text_LanguageDetect_Parser extends Text_LanguageDetect
34	{
35	/**
36	* the piece of text being parsed
37	*
38	* @access private
39	* @var string
40	*/
41	var $_string;
42
43	/**
44	* stores the trigram frequencies of the sample
45	*
46	* @access private
47	* @var string
48	*/
49	var $_trigrams = array();
50
51	/**
52	* stores the trigram ranks of the sample
53	*
54	* @access private
55	* @var array
56	*/
57	var $_trigram_ranks = array();
58
59	/**
60	* stores the unicode blocks of the sample
61	*
62	* @access private
63	* @var array
64	*/
65	var $_unicode_blocks = array();
66
67	/**
68	* Whether the parser should compile the unicode ranges
69	*
70	* @access private
71	* @var bool
72	*/
73	var $_compile_unicode = false;
74
75	/**
76	* Whether the parser should compile trigrams
77	*
78	* @access private
79	* @var bool
80	*/
81	var $_compile_trigram = false;
82
83	/**
84	* Whether the trigram parser should pad the beginning of the string
85	*
86	* @access private
87	* @var bool
88	*/
89	var $_trigram_pad_start = false;
90
91	/**
92	* Whether the unicode parser should skip non-alphabetical ascii chars
93	*
94	* @access private
95	* @var bool
96	*/
97	var $_unicode_skip_symbols = true;
98
99	/**
100	* Constructor
101	*
102	* @access private
103	* @param string $string string to be parsed
104	*/
105	function Text_LanguageDetect_Parser($string) {
106	$this->_string = $string;
107	}
108
109	/**
110	* Returns true if a string is suitable for parsing
111	*
112	* @param string $str input string to test
113	* @return bool true if acceptable, false if not
114	*/
115	public static function validateString($str) {
116	if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
117	return true;
118	} else {
119	return false;
120	}
121	}
122
123	/**
124	* turn on/off trigram counting
125	*
126	* @access public
127	* @param bool $bool true for on, false for off
128	*/
129	function prepareTrigram($bool = true)
130	{
131	$this->_compile_trigram = $bool;
132	}
133
134	/**
135	* turn on/off unicode block counting
136	*
137	* @access public
138	* @param bool $bool true for on, false for off
139	*/
140	function prepareUnicode($bool = true)
141	{
142	$this->_compile_unicode = $bool;
143	}
144
145	/**
146	* turn on/off padding the beginning of the sample string
147	*
148	* @access public
149	* @param bool $bool true for on, false for off
150	*/
151	function setPadStart($bool = true)
152	{
153	$this->_trigram_pad_start = $bool;
154	}
155
156	/**
157	* Should the unicode block counter skip non-alphabetical ascii chars?
158	*
159	* @access public
160	* @param bool $bool true for on, false for off
161	*/
162	function setUnicodeSkipSymbols($bool = true)
163	{
164	$this->_unicode_skip_symbols = $bool;
165	}
166
167	/**
168	* Returns the trigram ranks for the text sample
169	*
170	* @access public
171	* @return array trigram ranks in the text sample
172	*/
173	function &getTrigramRanks()
174	{
175	return $this->_trigram_ranks;
176	}
177
178	/**
179	* Return the trigram freqency table
180	*
181	* only used in testing to make sure the parser is working
182	*
183	* @access public
184	* @return array trigram freqencies in the text sample
185	*/
186	function &getTrigramFreqs()
187	{
188	return $this->_trigram;
189	}
190
191	/**
192	* returns the array of unicode blocks
193	*
194	* @access public
195	* @return array unicode blocks in the text sample
196	*/
197	function &getUnicodeBlocks()
198	{
199	return $this->_unicode_blocks;
200	}
201
202	/**
203	* Executes the parsing operation
204	*
205	* Be sure to call the set*() functions to set options and the
206	* prepare*() functions first to tell it what kind of data to compute
207	*
208	* Afterwards the get*() functions can be used to access the compiled
209	* information.
210	*
211	* @access public
212	*/
213	function analyze()
214	{
215	$len = strlen($this->_string);
216	$byte_counter = 0;
217
218
219	// unicode startup
220	if ($this->_compile_unicode) {
221	$blocks = $this->_read_unicode_block_db();
222	$block_count = count($blocks);
223
224	$skipped_count = 0;
225	$unicode_chars = array();
226	}
227
228	// trigram startup
229	if ($this->_compile_trigram) {
230	// initialize them as blank so the parser will skip the first two
231	// (since it skips trigrams with more than 2 contiguous spaces)
232	$a = ' ';
233	$b = ' ';
234
235	// kludge
236	// if it finds a valid trigram to start and the start pad option is
237	// off, then set a variable that will be used to reduce this
238	// trigram after parsing has finished
239	if (!$this->_trigram_pad_start) {
240	$a = $this->_next_char($this->_string, $byte_counter, true);
241
242	if ($a != ' ') {
243	$b = $this->_next_char($this->_string, $byte_counter, true);
244	$dropone = " $a$b";
245	}
246
247	$byte_counter = 0;
248	$a = ' ';
249	$b = ' ';
250	}
251	}
252
253	while ($byte_counter < $len) {
254	$char = $this->_next_char($this->_string, $byte_counter, true);
255
256
257	// language trigram detection
258	if ($this->_compile_trigram) {
259	if (!($b == ' ' && ($a == ' ' \|\| $char == ' '))) {
260	if (!isset($this->_trigram[$a . $b . $char])) {
261	$this->_trigram[$a . $b . $char] = 1;
262	} else {
263	$this->_trigram[$a . $b . $char]++;
264	}
265	}
266
267	$a = $b;
268	$b = $char;
269	}
270
271	// unicode block detection
272	if ($this->_compile_unicode) {
273	if ($this->_unicode_skip_symbols
274	&& strlen($char) == 1
275	&& ($char < 'A' \|\| $char > 'z'
276	\|\| ($char > 'Z' && $char < 'a'))
277	&& $char != "'") { // does not skip the apostrophe
278	// since it's included in the language
279	// models
280
281	$skipped_count++;
282	continue;
283	}
284
285	// build an array of all the characters
286	if (isset($unicode_chars[$char])) {
287	$unicode_chars[$char]++;
288	} else {
289	$unicode_chars[$char] = 1;
290	}
291	}
292
293	// todo: add byte detection here
294	}
295
296	// unicode cleanup
297	if ($this->_compile_unicode) {
298	foreach ($unicode_chars as $utf8_char => $count) {
299	$search_result = $this->_unicode_block_name(
300	$this->_utf8char2unicode($utf8_char), $blocks, $block_count);
301
302	if ($search_result != -1) {
303	$block_name = $search_result[2];
304	} else {
305	$block_name = '[Malformatted]';
306	}
307
308	if (isset($this->_unicode_blocks[$block_name])) {
309	$this->_unicode_blocks[$block_name] += $count;
310	} else {
311	$this->_unicode_blocks[$block_name] = $count;
312	}
313	}
314	}
315
316
317	// trigram cleanup
318	if ($this->_compile_trigram) {
319	// pad the end
320	if ($b != ' ') {
321	if (!isset($this->_trigram["$a$b "])) {
322	$this->_trigram["$a$b "] = 1;
323	} else {
324	$this->_trigram["$a$b "]++;
325	}
326	}
327
328	// perl compatibility; Language::Guess does not pad the beginning
329	// kludge
330	if (isset($dropone)) {
331	if ($this->_trigram[$dropone] == 1) {
332	unset($this->_trigram[$dropone]);
333	} else {
334	$this->_trigram[$dropone]--;
335	}
336	}
337
338	if (!empty($this->_trigram)) {
339	$this->_trigram_ranks = $this->_arr_rank($this->_trigram);
340	} else {
341	$this->_trigram_ranks = array();
342	}
343	}
344	}
345	}
346
347	/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ \ No newline at end of file