From d18ff7d9565f982bc15c5930123992d44614e1e2 Mon Sep 17 00:00:00 2001 From: Maryana Rozhankivska Date: Fri, 23 May 2014 19:25:48 +0300 Subject: two small unimportant forgotten changes to 3.2 version of full-text-rss, issue #694 --- inc/3rdparty/libraries/language-detect/Parser.php | 354 ---------------------- inc/3rdparty/site_config/custom/index.php | 3 + 2 files changed, 3 insertions(+), 354 deletions(-) delete mode 100644 inc/3rdparty/libraries/language-detect/Parser.php create mode 100644 inc/3rdparty/site_config/custom/index.php diff --git a/inc/3rdparty/libraries/language-detect/Parser.php b/inc/3rdparty/libraries/language-detect/Parser.php deleted file mode 100644 index 7f15fa98..00000000 --- a/inc/3rdparty/libraries/language-detect/Parser.php +++ /dev/null @@ -1,354 +0,0 @@ -_db_filename = $db; - if (isset($unicode_db)) $this->_unicode_db_filename = $unicode_db; - $this->_string = $string; - } - - /** - * Returns true if a string is suitable for parsing - * - * @static - * @access public - * @param string $str input string to test - * @return bool true if acceptable, false if not - */ - function validateString($str) { - if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) { - return true; - } else { - return false; - } - } - - /** - * turn on/off trigram counting - * - * @access public - * @param bool $bool true for on, false for off - */ - function prepareTrigram($bool = true) - { - $this->_compile_trigram = $bool; - } - - /** - * turn on/off unicode block counting - * - * @access public - * @param bool $bool true for on, false for off - */ - function prepareUnicode($bool = true) - { - $this->_compile_unicode = $bool; - } - - /** - * turn on/off padding the beginning of the sample string - * - * @access public - * @param bool $bool true for on, false for off - */ - function setPadStart($bool = true) - { - $this->_trigram_pad_start = $bool; - } - - /** - * Should the unicode block counter skip non-alphabetical ascii chars? - * - * @access public - * @param bool $bool true for on, false for off - */ - function setUnicodeSkipSymbols($bool = true) - { - $this->_unicode_skip_symbols = $bool; - } - - /** - * Returns the trigram ranks for the text sample - * - * @access public - * @return array trigram ranks in the text sample - */ - function &getTrigramRanks() - { - return $this->_trigram_ranks; - } - - /** - * Return the trigram freqency table - * - * only used in testing to make sure the parser is working - * - * @access public - * @return array trigram freqencies in the text sample - */ - function &getTrigramFreqs() - { - return $this->_trigram; - } - - /** - * returns the array of unicode blocks - * - * @access public - * @return array unicode blocks in the text sample - */ - function &getUnicodeBlocks() - { - return $this->_unicode_blocks; - } - - /** - * Executes the parsing operation - * - * Be sure to call the set*() functions to set options and the - * prepare*() functions first to tell it what kind of data to compute - * - * Afterwards the get*() functions can be used to access the compiled - * information. - * - * @access public - */ - function analyze() - { - $len = strlen($this->_string); - $byte_counter = 0; - - - // unicode startup - if ($this->_compile_unicode) { - $blocks =& $this->_read_unicode_block_db(); - - $block_count = count($blocks); - - $skipped_count = 0; - $unicode_chars = array(); - } - - // trigram startup - if ($this->_compile_trigram) { - // initialize them as blank so the parser will skip the first two - // (since it skips trigrams with more than 2 contiguous spaces) - $a = ' '; - $b = ' '; - - // kludge - // if it finds a valid trigram to start and the start pad option is - // off, then set a variable that will be used to reduce this - // trigram after parsing has finished - if (!$this->_trigram_pad_start) { - $a = $this->_next_char($this->_string, $byte_counter, true); - - if ($a != ' ') { - $b = $this->_next_char($this->_string, $byte_counter, true); - $dropone = " $a$b"; - } - - $byte_counter = 0; - $a = ' '; - $b = ' '; - } - } - - while ($byte_counter < $len) { - $char = $this->_next_char($this->_string, $byte_counter, true); - - - // language trigram detection - if ($this->_compile_trigram) { - if (!($b == ' ' && ($a == ' ' || $char == ' '))) { - if (!isset($this->_trigram[$a . $b . $char])) { - $this->_trigram[$a . $b . $char] = 1; - } else { - $this->_trigram[$a . $b . $char]++; - } - } - - $a = $b; - $b = $char; - } - - // unicode block detection - if ($this->_compile_unicode) { - if ($this->_unicode_skip_symbols - && strlen($char) == 1 - && ($char < 'A' || $char > 'z' - || ($char > 'Z' && $char < 'a')) - && $char != "'") { // does not skip the apostrophe - // since it's included in the language - // models - - $skipped_count++; - continue; - } - - // build an array of all the characters - if (isset($unicode_chars[$char])) { - $unicode_chars[$char]++; - } else { - $unicode_chars[$char] = 1; - } - } - - // todo: add byte detection here - } - - // unicode cleanup - if ($this->_compile_unicode) { - foreach ($unicode_chars as $utf8_char => $count) { - $search_result = $this->_unicode_block_name( - $this->_utf8char2unicode($utf8_char), $blocks, $block_count); - - if ($search_result != -1) { - $block_name = $search_result[2]; - } else { - $block_name = '[Malformatted]'; - } - - if (isset($this->_unicode_blocks[$block_name])) { - $this->_unicode_blocks[$block_name] += $count; - } else { - $this->_unicode_blocks[$block_name] = $count; - } - } - } - - - // trigram cleanup - if ($this->_compile_trigram) { - // pad the end - if ($b != ' ') { - if (!isset($this->_trigram["$a$b "])) { - $this->_trigram["$a$b "] = 1; - } else { - $this->_trigram["$a$b "]++; - } - } - - // perl compatibility; Language::Guess does not pad the beginning - // kludge - if (isset($dropone)) { - if ($this->_trigram[$dropone] == 1) { - unset($this->_trigram[$dropone]); - } else { - $this->_trigram[$dropone]--; - } - } - - if (!empty($this->_trigram)) { - $this->_trigram_ranks = $this->_arr_rank($this->_trigram); - } else { - $this->_trigram_ranks = array(); - } - } - } -} - -/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ - -?> diff --git a/inc/3rdparty/site_config/custom/index.php b/inc/3rdparty/site_config/custom/index.php new file mode 100644 index 00000000..a3d5f739 --- /dev/null +++ b/inc/3rdparty/site_config/custom/index.php @@ -0,0 +1,3 @@ + \ No newline at end of file -- cgit v1.2.3