diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-05-29 12:50:28 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-05-29 12:50:28 +0200 |
commit | 87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b (patch) | |
tree | 558818975ac41403e7d55ad07c5b0ac29806e907 /inc/3rdparty/makefulltextfeedHelpers.php | |
parent | ab157bbb75ba226917145c9bf906cbf764a85cd0 (diff) | |
parent | 0b9bb8cb7868f24137c5d8b85c39cc88ea877411 (diff) | |
download | wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.tar.gz wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.tar.zst wallabag-87f01ea2e97715ac5df4ef7a6741cc26f3a5cd1b.zip |
Merge pull request #707 from mariroz/dev
update to 3.2 version of full-text-rss, issue #694
Diffstat (limited to 'inc/3rdparty/makefulltextfeedHelpers.php')
-rwxr-xr-x | inc/3rdparty/makefulltextfeedHelpers.php | 42 |
1 files changed, 33 insertions, 9 deletions
diff --git a/inc/3rdparty/makefulltextfeedHelpers.php b/inc/3rdparty/makefulltextfeedHelpers.php index 1c11b8f6..4e985372 100755 --- a/inc/3rdparty/makefulltextfeedHelpers.php +++ b/inc/3rdparty/makefulltextfeedHelpers.php | |||
@@ -66,6 +66,38 @@ class DummySingleItem { | |||
66 | // HELPER FUNCTIONS | 66 | // HELPER FUNCTIONS |
67 | /////////////////////////////// | 67 | /////////////////////////////// |
68 | 68 | ||
69 | // Adapted from WordPress | ||
70 | // http://core.trac.wordpress.org/browser/tags/3.5.1/wp-includes/formatting.php#L2173 | ||
71 | function get_excerpt($text, $num_words=55, $more=null) { | ||
72 | if (null === $more) $more = '…'; | ||
73 | $text = strip_tags($text); | ||
74 | //TODO: Check if word count is based on single characters (East Asian characters) | ||
75 | /* | ||
76 | if (1==2) { | ||
77 | $text = trim(preg_replace("/[\n\r\t ]+/", ' ', $text), ' '); | ||
78 | preg_match_all('/./u', $text, $words_array); | ||
79 | $words_array = array_slice($words_array[0], 0, $num_words + 1); | ||
80 | $sep = ''; | ||
81 | } else { | ||
82 | $words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY); | ||
83 | $sep = ' '; | ||
84 | } | ||
85 | */ | ||
86 | $words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY); | ||
87 | $sep = ' '; | ||
88 | if (count($words_array) > $num_words) { | ||
89 | array_pop($words_array); | ||
90 | $text = implode($sep, $words_array); | ||
91 | $text = $text.$more; | ||
92 | } else { | ||
93 | $text = implode($sep, $words_array); | ||
94 | } | ||
95 | // trim whitespace at beginning or end of string | ||
96 | // See: http://stackoverflow.com/questions/4166896/trim-unicode-whitespace-in-php-5-2 | ||
97 | $text = preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $text); | ||
98 | return $text; | ||
99 | } | ||
100 | |||
69 | function url_allowed($url) { | 101 | function url_allowed($url) { |
70 | global $options; | 102 | global $options; |
71 | if (!empty($options->allowed_urls)) { | 103 | if (!empty($options->allowed_urls)) { |
@@ -165,14 +197,6 @@ function convert_to_utf8($html, $header=null) | |||
165 | if (strtolower($encoding) != 'utf-8') { | 197 | if (strtolower($encoding) != 'utf-8') { |
166 | debug('Converting to UTF-8'); | 198 | debug('Converting to UTF-8'); |
167 | $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8'); | 199 | $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8'); |
168 | /* | ||
169 | if (function_exists('iconv')) { | ||
170 | // iconv appears to handle certain character encodings better than mb_convert_encoding | ||
171 | $html = iconv($encoding, 'utf-8', $html); | ||
172 | } else { | ||
173 | $html = mb_convert_encoding($html, 'utf-8', $encoding); | ||
174 | } | ||
175 | */ | ||
176 | } | 200 | } |
177 | } | 201 | } |
178 | } | 202 | } |
@@ -196,7 +220,7 @@ function makeAbsolute($base, $elem) { | |||
196 | } | 220 | } |
197 | function makeAbsoluteAttr($base, $e, $attr) { | 221 | function makeAbsoluteAttr($base, $e, $attr) { |
198 | if ($e->hasAttribute($attr)) { | 222 | if ($e->hasAttribute($attr)) { |
199 | // Trim leading and trailing white space. I don't really like this but | 223 | // Trim leading and trailing white space. I don't really like this but |
200 | // unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" /> | 224 | // unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" /> |
201 | $url = trim(str_replace('%20', ' ', $e->getAttribute($attr))); | 225 | $url = trim(str_replace('%20', ' ', $e->getAttribute($attr))); |
202 | $url = str_replace(' ', '%20', $url); | 226 | $url = str_replace(' ', '%20', $url); |