X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=inc%2F3rdparty%2FmakefulltextfeedHelpers.php;h=ac872ab85910a5a0bed6a08bfb822b7b8aadf9b1;hb=90a1a78b1e2f4d40e1d9b8e6f46aca129a9d7bcf;hp=1c11b8f6b2345ea15adb93496ed97b2686a9cda6;hpb=9fad46bd0e448b312b426370770d967831e3c7f5;p=github%2Fwallabag%2Fwallabag.git diff --git a/inc/3rdparty/makefulltextfeedHelpers.php b/inc/3rdparty/makefulltextfeedHelpers.php index 1c11b8f6..ac872ab8 100755 --- a/inc/3rdparty/makefulltextfeedHelpers.php +++ b/inc/3rdparty/makefulltextfeedHelpers.php @@ -66,6 +66,38 @@ class DummySingleItem { // HELPER FUNCTIONS /////////////////////////////// +// Adapted from WordPress +// http://core.trac.wordpress.org/browser/tags/3.5.1/wp-includes/formatting.php#L2173 +function get_excerpt($text, $num_words=55, $more=null) { + if (null === $more) $more = '…'; + $text = strip_tags($text); + //TODO: Check if word count is based on single characters (East Asian characters) + /* + if (1==2) { + $text = trim(preg_replace("/[\n\r\t ]+/", ' ', $text), ' '); + preg_match_all('/./u', $text, $words_array); + $words_array = array_slice($words_array[0], 0, $num_words + 1); + $sep = ''; + } else { + $words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY); + $sep = ' '; + } + */ + $words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY); + $sep = ' '; + if (count($words_array) > $num_words) { + array_pop($words_array); + $text = implode($sep, $words_array); + $text = $text.$more; + } else { + $text = implode($sep, $words_array); + } + // trim whitespace at beginning or end of string + // See: http://stackoverflow.com/questions/4166896/trim-unicode-whitespace-in-php-5-2 + $text = preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $text); + return $text; +} + function url_allowed($url) { global $options; if (!empty($options->allowed_urls)) { @@ -165,14 +197,6 @@ function convert_to_utf8($html, $header=null) if (strtolower($encoding) != 'utf-8') { debug('Converting to UTF-8'); $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8'); - /* - if (function_exists('iconv')) { - // iconv appears to handle certain character encodings better than mb_convert_encoding - $html = iconv($encoding, 'utf-8', $html); - } else { - $html = mb_convert_encoding($html, 'utf-8', $encoding); - } - */ } } } @@ -196,7 +220,7 @@ function makeAbsolute($base, $elem) { } function makeAbsoluteAttr($base, $e, $attr) { if ($e->hasAttribute($attr)) { - // Trim leading and trailing white space. I don't really like this but + // Trim leading and trailing white space. I don't really like this but // unfortunately it does appear on some sites. e.g. $url = trim(str_replace('%20', ' ', $e->getAttribute($attr))); $url = str_replace(' ', '%20', $url); @@ -353,3 +377,13 @@ function debug($msg) { flush(); } } + +function get_base_url($dom) { + $xpath = new DOMXPath($dom); + $base_url = @$xpath->evaluate('string(//head/base/@href)', $dom); + if ($base_url !== '') { + return $base_url; + } else { + return false; + } +}