diff options
Diffstat (limited to 'inc/3rdparty')
-rw-r--r-- | inc/3rdparty/Session.class.php | 40 | ||||
-rw-r--r-- | inc/3rdparty/class.messages.php | 3 | ||||
-rw-r--r-- | inc/3rdparty/libraries/feedwriter/FeedItem.php | 1 | ||||
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/libraries/feedwriter/FeedWriter.php | 168 | ||||
-rwxr-xr-x | inc/3rdparty/makefulltextfeed.php | 353 | ||||
-rwxr-xr-x | inc/3rdparty/makefulltextfeedHelpers.php | 355 |
6 files changed, 478 insertions, 442 deletions
diff --git a/inc/3rdparty/Session.class.php b/inc/3rdparty/Session.class.php index b30a31f3..59dfbe67 100644 --- a/inc/3rdparty/Session.class.php +++ b/inc/3rdparty/Session.class.php | |||
@@ -31,9 +31,9 @@ class Session | |||
31 | public static $sessionName = ''; | 31 | public static $sessionName = ''; |
32 | // If the user does not access any page within this time, | 32 | // If the user does not access any page within this time, |
33 | // his/her session is considered expired (3600 sec. = 1 hour) | 33 | // his/her session is considered expired (3600 sec. = 1 hour) |
34 | public static $inactivityTimeout = 86400; | 34 | public static $inactivityTimeout = 3600; |
35 | // Extra timeout for long sessions (if enabled) (82800 sec. = 23 hours) | 35 | // Extra timeout for long sessions (if enabled) (82800 sec. = 23 hours) |
36 | public static $longSessionTimeout = 31536000; | 36 | public static $longSessionTimeout = 7776000; // 7776000 = 90 days |
37 | // If you get disconnected often or if your IP address changes often. | 37 | // If you get disconnected often or if your IP address changes often. |
38 | // Let you disable session cookie hijacking protection | 38 | // Let you disable session cookie hijacking protection |
39 | public static $disableSessionProtection = false; | 39 | public static $disableSessionProtection = false; |
@@ -48,8 +48,13 @@ class Session | |||
48 | /** | 48 | /** |
49 | * Initialize session | 49 | * Initialize session |
50 | */ | 50 | */ |
51 | public static function init() | 51 | public static function init($longlastingsession = false) |
52 | { | 52 | { |
53 | //check if session name is correct | ||
54 | if ( (session_id() && !empty(self::$sessionName) && session_name()!=self::$sessionName) || $longlastingsession ) { | ||
55 | session_destroy(); | ||
56 | } | ||
57 | |||
53 | // Force cookie path (but do not change lifetime) | 58 | // Force cookie path (but do not change lifetime) |
54 | $cookie = session_get_cookie_params(); | 59 | $cookie = session_get_cookie_params(); |
55 | // Default cookie expiration and path. | 60 | // Default cookie expiration and path. |
@@ -61,12 +66,22 @@ class Session | |||
61 | if (isset($_SERVER["HTTPS"]) && $_SERVER["HTTPS"] == "on") { | 66 | if (isset($_SERVER["HTTPS"]) && $_SERVER["HTTPS"] == "on") { |
62 | $ssl = true; | 67 | $ssl = true; |
63 | } | 68 | } |
64 | session_set_cookie_params($cookie['lifetime'], $cookiedir, $_SERVER['HTTP_HOST'], $ssl); | 69 | |
70 | if ( $longlastingsession ) { | ||
71 | session_set_cookie_params(self::$longSessionTimeout, $cookiedir, null, $ssl, true); | ||
72 | } | ||
73 | else { | ||
74 | session_set_cookie_params(0, $cookiedir, null, $ssl, true); | ||
75 | } | ||
76 | //set server side valid session timeout | ||
77 | //WARNING! this may not work in shared session environment. See http://www.php.net/manual/en/session.configuration.php#ini.session.gc-maxlifetime about min value: it can be set in any application | ||
78 | ini_set('session.gc_maxlifetime', self::$longSessionTimeout); | ||
79 | |||
65 | // Use cookies to store session. | 80 | // Use cookies to store session. |
66 | ini_set('session.use_cookies', 1); | 81 | ini_set('session.use_cookies', 1); |
67 | // Force cookies for session (phpsessionID forbidden in URL) | 82 | // Force cookies for session (phpsessionID forbidden in URL) |
68 | ini_set('session.use_only_cookies', 1); | 83 | ini_set('session.use_only_cookies', 1); |
69 | if (!session_id()) { | 84 | if ( !session_id() ) { |
70 | // Prevent php to use sessionID in URL if cookies are disabled. | 85 | // Prevent php to use sessionID in URL if cookies are disabled. |
71 | ini_set('session.use_trans_sid', false); | 86 | ini_set('session.use_trans_sid', false); |
72 | if (!empty(self::$sessionName)) { | 87 | if (!empty(self::$sessionName)) { |
@@ -115,6 +130,9 @@ class Session | |||
115 | if (self::banCanLogin()) { | 130 | if (self::banCanLogin()) { |
116 | if ($login === $loginTest && $password === $passwordTest) { | 131 | if ($login === $loginTest && $password === $passwordTest) { |
117 | self::banLoginOk(); | 132 | self::banLoginOk(); |
133 | |||
134 | self::init($longlastingsession); | ||
135 | |||
118 | // Generate unique random number to sign forms (HMAC) | 136 | // Generate unique random number to sign forms (HMAC) |
119 | $_SESSION['uid'] = sha1(uniqid('', true).'_'.mt_rand()); | 137 | $_SESSION['uid'] = sha1(uniqid('', true).'_'.mt_rand()); |
120 | $_SESSION['ip'] = self::_allIPs(); | 138 | $_SESSION['ip'] = self::_allIPs(); |
@@ -135,6 +153,7 @@ class Session | |||
135 | self::banLoginFailed(); | 153 | self::banLoginFailed(); |
136 | } | 154 | } |
137 | 155 | ||
156 | self::init(); | ||
138 | return false; | 157 | return false; |
139 | } | 158 | } |
140 | 159 | ||
@@ -143,7 +162,14 @@ class Session | |||
143 | */ | 162 | */ |
144 | public static function logout() | 163 | public static function logout() |
145 | { | 164 | { |
146 | unset($_SESSION['uid'],$_SESSION['ip'],$_SESSION['expires_on'],$_SESSION['tokens'], $_SESSION['login'], $_SESSION['pass'], $_SESSION['longlastingsession'], $_SESSION['poche_user']); | 165 | // unset($_SESSION['uid'],$_SESSION['ip'],$_SESSION['expires_on'],$_SESSION['tokens'], $_SESSION['login'], $_SESSION['pass'], $_SESSION['longlastingsession'], $_SESSION['poche_user']); |
166 | |||
167 | // Destruction du cookie (le code peut paraître complexe mais c'est pour être certain de reprendre les mêmes paramètres) | ||
168 | $args = array_merge(array(session_name(), ''), array_values(session_get_cookie_params())); | ||
169 | $args[2] = time() - 3600; | ||
170 | call_user_func_array('setcookie', $args); | ||
171 | // Suppression physique de la session | ||
172 | session_destroy(); | ||
147 | } | 173 | } |
148 | 174 | ||
149 | /** | 175 | /** |
@@ -157,7 +183,7 @@ class Session | |||
157 | || (self::$disableSessionProtection === false | 183 | || (self::$disableSessionProtection === false |
158 | && $_SESSION['ip'] !== self::_allIPs()) | 184 | && $_SESSION['ip'] !== self::_allIPs()) |
159 | || time() >= $_SESSION['expires_on']) { | 185 | || time() >= $_SESSION['expires_on']) { |
160 | self::logout(); | 186 | //self::logout(); |
161 | 187 | ||
162 | return false; | 188 | return false; |
163 | } | 189 | } |
diff --git a/inc/3rdparty/class.messages.php b/inc/3rdparty/class.messages.php index e60bd3a1..27c28f43 100644 --- a/inc/3rdparty/class.messages.php +++ b/inc/3rdparty/class.messages.php | |||
@@ -59,6 +59,7 @@ class Messages { | |||
59 | $this->msgId = md5(uniqid()); | 59 | $this->msgId = md5(uniqid()); |
60 | 60 | ||
61 | // Create the session array if it doesnt already exist | 61 | // Create the session array if it doesnt already exist |
62 | settype($_SESSION, 'array'); | ||
62 | if( !array_key_exists('flash_messages', $_SESSION) ) $_SESSION['flash_messages'] = array(); | 63 | if( !array_key_exists('flash_messages', $_SESSION) ) $_SESSION['flash_messages'] = array(); |
63 | 64 | ||
64 | } | 65 | } |
@@ -228,4 +229,4 @@ class Messages { | |||
228 | 229 | ||
229 | 230 | ||
230 | } // end class | 231 | } // end class |
231 | ?> \ No newline at end of file | 232 | ?> |
diff --git a/inc/3rdparty/libraries/feedwriter/FeedItem.php b/inc/3rdparty/libraries/feedwriter/FeedItem.php index 9373deeb..0eae5e08 100644 --- a/inc/3rdparty/libraries/feedwriter/FeedItem.php +++ b/inc/3rdparty/libraries/feedwriter/FeedItem.php | |||
@@ -156,6 +156,7 @@ | |||
156 | if($this->version == RSS2 || $this->version == RSS1) | 156 | if($this->version == RSS2 || $this->version == RSS1) |
157 | { | 157 | { |
158 | $this->setElement('link', $link); | 158 | $this->setElement('link', $link); |
159 | $this->setElement('guid', $link); | ||
159 | } | 160 | } |
160 | else | 161 | else |
161 | { | 162 | { |
diff --git a/inc/3rdparty/libraries/feedwriter/FeedWriter.php b/inc/3rdparty/libraries/feedwriter/FeedWriter.php index adb2526c..5d16e765 100644..100755 --- a/inc/3rdparty/libraries/feedwriter/FeedWriter.php +++ b/inc/3rdparty/libraries/feedwriter/FeedWriter.php | |||
@@ -9,9 +9,9 @@ define('JSONP', 3, true); | |||
9 | * Genarate RSS2 or JSON (original: RSS 1.0, RSS2.0 and ATOM Feed) | 9 | * Genarate RSS2 or JSON (original: RSS 1.0, RSS2.0 and ATOM Feed) |
10 | * | 10 | * |
11 | * Modified for FiveFilters.org's Full-Text RSS project | 11 | * Modified for FiveFilters.org's Full-Text RSS project |
12 | * to allow for inclusion of hubs, JSON output. | 12 | * to allow for inclusion of hubs, JSON output. |
13 | * Stripped RSS1 and ATOM support. | 13 | * Stripped RSS1 and ATOM support. |
14 | * | 14 | * |
15 | * @package UnivarselFeedWriter | 15 | * @package UnivarselFeedWriter |
16 | * @author Anis uddin Ahmad <anisniit@gmail.com> | 16 | * @author Anis uddin Ahmad <anisniit@gmail.com> |
17 | * @link http://www.ajaxray.com/projects/rss | 17 | * @link http://www.ajaxray.com/projects/rss |
@@ -26,32 +26,32 @@ define('JSONP', 3, true); | |||
26 | private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA | 26 | private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA |
27 | private $xsl = null; // stylesheet to render RSS (used by Chrome) | 27 | private $xsl = null; // stylesheet to render RSS (used by Chrome) |
28 | private $json = null; // JSON object | 28 | private $json = null; // JSON object |
29 | 29 | ||
30 | private $version = null; | 30 | private $version = null; |
31 | 31 | ||
32 | /** | 32 | /** |
33 | * Constructor | 33 | * Constructor |
34 | * | 34 | * |
35 | * @param constant the version constant (RSS2 or JSON). | 35 | * @param constant the version constant (RSS2 or JSON). |
36 | */ | 36 | */ |
37 | function __construct($version = RSS2) | 37 | function __construct($version = RSS2) |
38 | { | 38 | { |
39 | $this->version = $version; | 39 | $this->version = $version; |
40 | 40 | ||
41 | // Setting default value for assential channel elements | 41 | // Setting default value for assential channel elements |
42 | $this->channels['title'] = $version . ' Feed'; | 42 | $this->channels['title'] = $version . ' Feed'; |
43 | $this->channels['link'] = 'http://www.ajaxray.com/blog'; | 43 | $this->channels['link'] = 'http://www.ajaxray.com/blog'; |
44 | 44 | ||
45 | //Tag names to encode in CDATA | 45 | //Tag names to encode in CDATA |
46 | $this->CDATAEncoding = array('description', 'content:encoded', 'content', 'subtitle', 'summary'); | 46 | $this->CDATAEncoding = array('description', 'content:encoded', 'content', 'subtitle', 'summary'); |
47 | } | 47 | } |
48 | 48 | ||
49 | public function setFormat($format) { | 49 | public function setFormat($format) { |
50 | $this->version = $format; | 50 | $this->version = $format; |
51 | } | 51 | } |
52 | 52 | ||
53 | // Start # public functions --------------------------------------------- | 53 | // Start # public functions --------------------------------------------- |
54 | 54 | ||
55 | /** | 55 | /** |
56 | * Set a channel element | 56 | * Set a channel element |
57 | * @access public | 57 | * @access public |
@@ -63,11 +63,11 @@ define('JSONP', 3, true); | |||
63 | { | 63 | { |
64 | $this->channels[$elementName] = $content ; | 64 | $this->channels[$elementName] = $content ; |
65 | } | 65 | } |
66 | 66 | ||
67 | /** | 67 | /** |
68 | * Set multiple channel elements from an array. Array elements | 68 | * Set multiple channel elements from an array. Array elements |
69 | * should be 'channelName' => 'channelContent' format. | 69 | * should be 'channelName' => 'channelContent' format. |
70 | * | 70 | * |
71 | * @access public | 71 | * @access public |
72 | * @param array array of channels | 72 | * @param array array of channels |
73 | * @return void | 73 | * @return void |
@@ -75,30 +75,30 @@ define('JSONP', 3, true); | |||
75 | public function setChannelElementsFromArray($elementArray) | 75 | public function setChannelElementsFromArray($elementArray) |
76 | { | 76 | { |
77 | if(! is_array($elementArray)) return; | 77 | if(! is_array($elementArray)) return; |
78 | foreach ($elementArray as $elementName => $content) | 78 | foreach ($elementArray as $elementName => $content) |
79 | { | 79 | { |
80 | $this->setChannelElement($elementName, $content); | 80 | $this->setChannelElement($elementName, $content); |
81 | } | 81 | } |
82 | } | 82 | } |
83 | 83 | ||
84 | /** | 84 | /** |
85 | * Genarate the actual RSS/JSON file | 85 | * Genarate the actual RSS/JSON file |
86 | * | 86 | * |
87 | * @access public | 87 | * @access public |
88 | * @return void | 88 | * @return void |
89 | */ | 89 | */ |
90 | public function genarateFeed() | 90 | public function genarateFeed() |
91 | { | 91 | { |
92 | if ($this->version == RSS2) { | 92 | if ($this->version == RSS2) { |
93 | header('Content-type: text/xml; charset=UTF-8'); | 93 | // header('Content-type: text/xml; charset=UTF-8'); |
94 | // this line prevents Chrome 20 from prompting download | 94 | // this line prevents Chrome 20 from prompting download |
95 | // used by Google: https://news.google.com/news/feeds?ned=us&topic=b&output=rss | 95 | // used by Google: https://news.google.com/news/feeds?ned=us&topic=b&output=rss |
96 | header('X-content-type-options: nosniff'); | 96 | // header('X-content-type-options: nosniff'); |
97 | } elseif ($this->version == JSON) { | 97 | } elseif ($this->version == JSON) { |
98 | header('Content-type: application/json; charset=UTF-8'); | 98 | // header('Content-type: application/json; charset=UTF-8'); |
99 | $this->json = new stdClass(); | 99 | $this->json = new stdClass(); |
100 | } elseif ($this->version == JSONP) { | 100 | } elseif ($this->version == JSONP) { |
101 | header('Content-type: application/javascript; charset=UTF-8'); | 101 | // header('Content-type: application/javascript; charset=UTF-8'); |
102 | $this->json = new stdClass(); | 102 | $this->json = new stdClass(); |
103 | } | 103 | } |
104 | $this->printHead(); | 104 | $this->printHead(); |
@@ -109,10 +109,10 @@ define('JSONP', 3, true); | |||
109 | echo json_encode($this->json); | 109 | echo json_encode($this->json); |
110 | } | 110 | } |
111 | } | 111 | } |
112 | 112 | ||
113 | /** | 113 | /** |
114 | * Create a new FeedItem. | 114 | * Create a new FeedItem. |
115 | * | 115 | * |
116 | * @access public | 116 | * @access public |
117 | * @return object instance of FeedItem class | 117 | * @return object instance of FeedItem class |
118 | */ | 118 | */ |
@@ -121,24 +121,24 @@ define('JSONP', 3, true); | |||
121 | $Item = new FeedItem($this->version); | 121 | $Item = new FeedItem($this->version); |
122 | return $Item; | 122 | return $Item; |
123 | } | 123 | } |
124 | 124 | ||
125 | /** | 125 | /** |
126 | * Add a FeedItem to the main class | 126 | * Add a FeedItem to the main class |
127 | * | 127 | * |
128 | * @access public | 128 | * @access public |
129 | * @param object instance of FeedItem class | 129 | * @param object instance of FeedItem class |
130 | * @return void | 130 | * @return void |
131 | */ | 131 | */ |
132 | public function addItem($feedItem) | 132 | public function addItem($feedItem) |
133 | { | 133 | { |
134 | $this->items[] = $feedItem; | 134 | $this->items[] = $feedItem; |
135 | } | 135 | } |
136 | 136 | ||
137 | // Wrapper functions ------------------------------------------------------------------- | 137 | // Wrapper functions ------------------------------------------------------------------- |
138 | 138 | ||
139 | /** | 139 | /** |
140 | * Set the 'title' channel element | 140 | * Set the 'title' channel element |
141 | * | 141 | * |
142 | * @access public | 142 | * @access public |
143 | * @param srting value of 'title' channel tag | 143 | * @param srting value of 'title' channel tag |
144 | * @return void | 144 | * @return void |
@@ -147,59 +147,59 @@ define('JSONP', 3, true); | |||
147 | { | 147 | { |
148 | $this->setChannelElement('title', $title); | 148 | $this->setChannelElement('title', $title); |
149 | } | 149 | } |
150 | 150 | ||
151 | /** | 151 | /** |
152 | * Add a hub to the channel element | 152 | * Add a hub to the channel element |
153 | * | 153 | * |
154 | * @access public | 154 | * @access public |
155 | * @param string URL | 155 | * @param string URL |
156 | * @return void | 156 | * @return void |
157 | */ | 157 | */ |
158 | public function addHub($hub) | 158 | public function addHub($hub) |
159 | { | 159 | { |
160 | $this->hubs[] = $hub; | 160 | $this->hubs[] = $hub; |
161 | } | 161 | } |
162 | 162 | ||
163 | /** | 163 | /** |
164 | * Set XSL URL | 164 | * Set XSL URL |
165 | * | 165 | * |
166 | * @access public | 166 | * @access public |
167 | * @param string URL | 167 | * @param string URL |
168 | * @return void | 168 | * @return void |
169 | */ | 169 | */ |
170 | public function setXsl($xsl) | 170 | public function setXsl($xsl) |
171 | { | 171 | { |
172 | $this->xsl = $xsl; | 172 | $this->xsl = $xsl; |
173 | } | 173 | } |
174 | 174 | ||
175 | /** | 175 | /** |
176 | * Set self URL | 176 | * Set self URL |
177 | * | 177 | * |
178 | * @access public | 178 | * @access public |
179 | * @param string URL | 179 | * @param string URL |
180 | * @return void | 180 | * @return void |
181 | */ | 181 | */ |
182 | public function setSelf($self) | 182 | public function setSelf($self) |
183 | { | 183 | { |
184 | $this->self = $self; | 184 | $this->self = $self; |
185 | } | 185 | } |
186 | 186 | ||
187 | /** | 187 | /** |
188 | * Set the 'description' channel element | 188 | * Set the 'description' channel element |
189 | * | 189 | * |
190 | * @access public | 190 | * @access public |
191 | * @param srting value of 'description' channel tag | 191 | * @param srting value of 'description' channel tag |
192 | * @return void | 192 | * @return void |
193 | */ | 193 | */ |
194 | public function setDescription($desciption) | 194 | public function setDescription($desciption) |
195 | { | 195 | { |
196 | $tag = ($this->version == ATOM)? 'subtitle' : 'description'; | 196 | $tag = ($this->version == ATOM)? 'subtitle' : 'description'; |
197 | $this->setChannelElement($tag, $desciption); | 197 | $this->setChannelElement($tag, $desciption); |
198 | } | 198 | } |
199 | 199 | ||
200 | /** | 200 | /** |
201 | * Set the 'link' channel element | 201 | * Set the 'link' channel element |
202 | * | 202 | * |
203 | * @access public | 203 | * @access public |
204 | * @param srting value of 'link' channel tag | 204 | * @param srting value of 'link' channel tag |
205 | * @return void | 205 | * @return void |
@@ -208,10 +208,10 @@ define('JSONP', 3, true); | |||
208 | { | 208 | { |
209 | $this->setChannelElement('link', $link); | 209 | $this->setChannelElement('link', $link); |
210 | } | 210 | } |
211 | 211 | ||
212 | /** | 212 | /** |
213 | * Set the 'image' channel element | 213 | * Set the 'image' channel element |
214 | * | 214 | * |
215 | * @access public | 215 | * @access public |
216 | * @param srting title of image | 216 | * @param srting title of image |
217 | * @param srting link url of the imahe | 217 | * @param srting link url of the imahe |
@@ -222,14 +222,14 @@ define('JSONP', 3, true); | |||
222 | { | 222 | { |
223 | $this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url)); | 223 | $this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url)); |
224 | } | 224 | } |
225 | 225 | ||
226 | // End # public functions ---------------------------------------------- | 226 | // End # public functions ---------------------------------------------- |
227 | 227 | ||
228 | // Start # private functions ---------------------------------------------- | 228 | // Start # private functions ---------------------------------------------- |
229 | 229 | ||
230 | /** | 230 | /** |
231 | * Prints the xml and rss namespace | 231 | * Prints the xml and rss namespace |
232 | * | 232 | * |
233 | * @access private | 233 | * @access private |
234 | * @return void | 234 | * @return void |
235 | */ | 235 | */ |
@@ -247,10 +247,10 @@ define('JSONP', 3, true); | |||
247 | $this->json->rss = array('@attributes' => array('version' => '2.0')); | 247 | $this->json->rss = array('@attributes' => array('version' => '2.0')); |
248 | } | 248 | } |
249 | } | 249 | } |
250 | 250 | ||
251 | /** | 251 | /** |
252 | * Closes the open tags at the end of file | 252 | * Closes the open tags at the end of file |
253 | * | 253 | * |
254 | * @access private | 254 | * @access private |
255 | * @return void | 255 | * @return void |
256 | */ | 256 | */ |
@@ -258,14 +258,14 @@ define('JSONP', 3, true); | |||
258 | { | 258 | { |
259 | if ($this->version == RSS2) | 259 | if ($this->version == RSS2) |
260 | { | 260 | { |
261 | echo '</channel>',PHP_EOL,'</rss>'; | 261 | echo '</channel>',PHP_EOL,'</rss>'; |
262 | } | 262 | } |
263 | // do nothing for JSON | 263 | // do nothing for JSON |
264 | } | 264 | } |
265 | 265 | ||
266 | /** | 266 | /** |
267 | * Creates a single node as xml format | 267 | * Creates a single node as xml format |
268 | * | 268 | * |
269 | * @access private | 269 | * @access private |
270 | * @param string name of the tag | 270 | * @param string name of the tag |
271 | * @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format | 271 | * @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format |
@@ -273,22 +273,22 @@ define('JSONP', 3, true); | |||
273 | * @return string formatted xml tag | 273 | * @return string formatted xml tag |
274 | */ | 274 | */ |
275 | private function makeNode($tagName, $tagContent, $attributes = null) | 275 | private function makeNode($tagName, $tagContent, $attributes = null) |
276 | { | 276 | { |
277 | if ($this->version == RSS2) | 277 | if ($this->version == RSS2) |
278 | { | 278 | { |
279 | $nodeText = ''; | 279 | $nodeText = ''; |
280 | $attrText = ''; | 280 | $attrText = ''; |
281 | if (is_array($attributes)) | 281 | if (is_array($attributes)) |
282 | { | 282 | { |
283 | foreach ($attributes as $key => $value) | 283 | foreach ($attributes as $key => $value) |
284 | { | 284 | { |
285 | $attrText .= " $key=\"$value\" "; | 285 | $attrText .= " $key=\"$value\" "; |
286 | } | 286 | } |
287 | } | 287 | } |
288 | $nodeText .= "<{$tagName}{$attrText}>"; | 288 | $nodeText .= "<{$tagName}{$attrText}>"; |
289 | if (is_array($tagContent)) | 289 | if (is_array($tagContent)) |
290 | { | 290 | { |
291 | foreach ($tagContent as $key => $value) | 291 | foreach ($tagContent as $key => $value) |
292 | { | 292 | { |
293 | $nodeText .= $this->makeNode($key, $value); | 293 | $nodeText .= $this->makeNode($key, $value); |
294 | } | 294 | } |
@@ -297,7 +297,7 @@ define('JSONP', 3, true); | |||
297 | { | 297 | { |
298 | //$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent); | 298 | //$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent); |
299 | $nodeText .= htmlspecialchars($tagContent); | 299 | $nodeText .= htmlspecialchars($tagContent); |
300 | } | 300 | } |
301 | //$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>"; | 301 | //$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>"; |
302 | $nodeText .= "</$tagName>"; | 302 | $nodeText .= "</$tagName>"; |
303 | return $nodeText . PHP_EOL; | 303 | return $nodeText . PHP_EOL; |
@@ -321,7 +321,7 @@ define('JSONP', 3, true); | |||
321 | } | 321 | } |
322 | return ''; // should not get here | 322 | return ''; // should not get here |
323 | } | 323 | } |
324 | 324 | ||
325 | private function json_keys(array $array) { | 325 | private function json_keys(array $array) { |
326 | $new = array(); | 326 | $new = array(); |
327 | foreach ($array as $key => $val) { | 327 | foreach ($array as $key => $val) { |
@@ -334,7 +334,7 @@ define('JSONP', 3, true); | |||
334 | } | 334 | } |
335 | return $new; | 335 | return $new; |
336 | } | 336 | } |
337 | 337 | ||
338 | /** | 338 | /** |
339 | * @desc Print channels | 339 | * @desc Print channels |
340 | * @access private | 340 | * @access private |
@@ -344,7 +344,7 @@ define('JSONP', 3, true); | |||
344 | { | 344 | { |
345 | //Start channel tag | 345 | //Start channel tag |
346 | if ($this->version == RSS2) { | 346 | if ($this->version == RSS2) { |
347 | echo '<channel>' . PHP_EOL; | 347 | echo '<channel>' . PHP_EOL; |
348 | // add hubs | 348 | // add hubs |
349 | foreach ($this->hubs as $hub) { | 349 | foreach ($this->hubs as $hub) { |
350 | //echo $this->makeNode('link', '', array('rel'=>'hub', 'href'=>$hub, 'xmlns'=>'http://www.w3.org/2005/Atom')); | 350 | //echo $this->makeNode('link', '', array('rel'=>'hub', 'href'=>$hub, 'xmlns'=>'http://www.w3.org/2005/Atom')); |
@@ -356,7 +356,7 @@ define('JSONP', 3, true); | |||
356 | echo '<link rel="self" href="'.htmlspecialchars($this->self).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL; | 356 | echo '<link rel="self" href="'.htmlspecialchars($this->self).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL; |
357 | } | 357 | } |
358 | //Print Items of channel | 358 | //Print Items of channel |
359 | foreach ($this->channels as $key => $value) | 359 | foreach ($this->channels as $key => $value) |
360 | { | 360 | { |
361 | echo $this->makeNode($key, $value); | 361 | echo $this->makeNode($key, $value); |
362 | } | 362 | } |
@@ -364,26 +364,26 @@ define('JSONP', 3, true); | |||
364 | $this->json->rss['channel'] = (object)$this->json_keys($this->channels); | 364 | $this->json->rss['channel'] = (object)$this->json_keys($this->channels); |
365 | } | 365 | } |
366 | } | 366 | } |
367 | 367 | ||
368 | /** | 368 | /** |
369 | * Prints formatted feed items | 369 | * Prints formatted feed items |
370 | * | 370 | * |
371 | * @access private | 371 | * @access private |
372 | * @return void | 372 | * @return void |
373 | */ | 373 | */ |
374 | private function printItems() | 374 | private function printItems() |
375 | { | 375 | { |
376 | foreach ($this->items as $item) { | 376 | foreach ($this->items as $item) { |
377 | $itemElements = $item->getElements(); | 377 | $itemElements = $item->getElements(); |
378 | 378 | ||
379 | echo $this->startItem(); | 379 | echo $this->startItem(); |
380 | 380 | ||
381 | if ($this->version == JSON || $this->version == JSONP) { | 381 | if ($this->version == JSON || $this->version == JSONP) { |
382 | $json_item = array(); | 382 | $json_item = array(); |
383 | } | 383 | } |
384 | 384 | ||
385 | foreach ($itemElements as $thisElement) { | 385 | foreach ($itemElements as $thisElement) { |
386 | foreach ($thisElement as $instance) { | 386 | foreach ($thisElement as $instance) { |
387 | if ($this->version == RSS2) { | 387 | if ($this->version == RSS2) { |
388 | echo $this->makeNode($instance['name'], $instance['content'], $instance['attributes']); | 388 | echo $this->makeNode($instance['name'], $instance['content'], $instance['attributes']); |
389 | } elseif ($this->version == JSON || $this->version == JSONP) { | 389 | } elseif ($this->version == JSON || $this->version == JSONP) { |
@@ -406,10 +406,10 @@ define('JSONP', 3, true); | |||
406 | } | 406 | } |
407 | } | 407 | } |
408 | } | 408 | } |
409 | 409 | ||
410 | /** | 410 | /** |
411 | * Make the starting tag of channels | 411 | * Make the starting tag of channels |
412 | * | 412 | * |
413 | * @access private | 413 | * @access private |
414 | * @return void | 414 | * @return void |
415 | */ | 415 | */ |
@@ -417,14 +417,14 @@ define('JSONP', 3, true); | |||
417 | { | 417 | { |
418 | if ($this->version == RSS2) | 418 | if ($this->version == RSS2) |
419 | { | 419 | { |
420 | echo '<item>' . PHP_EOL; | 420 | echo '<item>' . PHP_EOL; |
421 | } | 421 | } |
422 | // nothing for JSON | 422 | // nothing for JSON |
423 | } | 423 | } |
424 | 424 | ||
425 | /** | 425 | /** |
426 | * Closes feed item tag | 426 | * Closes feed item tag |
427 | * | 427 | * |
428 | * @access private | 428 | * @access private |
429 | * @return void | 429 | * @return void |
430 | */ | 430 | */ |
@@ -432,10 +432,10 @@ define('JSONP', 3, true); | |||
432 | { | 432 | { |
433 | if ($this->version == RSS2) | 433 | if ($this->version == RSS2) |
434 | { | 434 | { |
435 | echo '</item>' . PHP_EOL; | 435 | echo '</item>' . PHP_EOL; |
436 | } | 436 | } |
437 | // nothing for JSON | 437 | // nothing for JSON |
438 | } | 438 | } |
439 | 439 | ||
440 | // End # private functions ---------------------------------------------- | 440 | // End # private functions ---------------------------------------------- |
441 | } \ No newline at end of file | 441 | } \ No newline at end of file |
diff --git a/inc/3rdparty/makefulltextfeed.php b/inc/3rdparty/makefulltextfeed.php index 2852c4c2..135964f1 100755 --- a/inc/3rdparty/makefulltextfeed.php +++ b/inc/3rdparty/makefulltextfeed.php | |||
@@ -55,42 +55,8 @@ if (get_magic_quotes_gpc()) { | |||
55 | 55 | ||
56 | // set include path | 56 | // set include path |
57 | set_include_path(realpath(dirname(__FILE__).'/libraries').PATH_SEPARATOR.get_include_path()); | 57 | set_include_path(realpath(dirname(__FILE__).'/libraries').PATH_SEPARATOR.get_include_path()); |
58 | // Autoloading of classes allows us to include files only when they're | 58 | |
59 | // needed. If we've got a cached copy, for example, only Zend_Cache is loaded. | 59 | require_once dirname(__FILE__).'/makefulltextfeedHelpers.php'; |
60 | function autoload($class_name) { | ||
61 | static $dir = null; | ||
62 | if ($dir === null) $dir = dirname(__FILE__).'/libraries/'; | ||
63 | static $mapping = array( | ||
64 | // Include FeedCreator for RSS/Atom creation | ||
65 | 'FeedWriter' => 'feedwriter/FeedWriter.php', | ||
66 | 'FeedItem' => 'feedwriter/FeedItem.php', | ||
67 | // Include ContentExtractor and Readability for identifying and extracting content from URLs | ||
68 | 'ContentExtractor' => 'content-extractor/ContentExtractor.php', | ||
69 | 'SiteConfig' => 'content-extractor/SiteConfig.php', | ||
70 | 'Readability' => 'readability/Readability.php', | ||
71 | // Include Humble HTTP Agent to allow parallel requests and response caching | ||
72 | 'HumbleHttpAgent' => 'humble-http-agent/HumbleHttpAgent.php', | ||
73 | 'SimplePie_HumbleHttpAgent' => 'humble-http-agent/SimplePie_HumbleHttpAgent.php', | ||
74 | 'CookieJar' => 'humble-http-agent/CookieJar.php', | ||
75 | // Include Zend Cache to improve performance (cache results) | ||
76 | 'Zend_Cache' => 'Zend/Cache.php', | ||
77 | // Language detect | ||
78 | 'Text_LanguageDetect' => 'language-detect/LanguageDetect.php', | ||
79 | // HTML5 Lib | ||
80 | 'HTML5_Parser' => 'html5/Parser.php', | ||
81 | // htmLawed - used if XSS filter is enabled (xss_filter) | ||
82 | 'htmLawed' => 'htmLawed/htmLawed.php' | ||
83 | ); | ||
84 | if (isset($mapping[$class_name])) { | ||
85 | debug("** Loading class $class_name ({$mapping[$class_name]})"); | ||
86 | require $dir.$mapping[$class_name]; | ||
87 | return true; | ||
88 | } else { | ||
89 | return false; | ||
90 | } | ||
91 | } | ||
92 | spl_autoload_register('autoload'); | ||
93 | require dirname(__FILE__).'/libraries/simplepie/autoloader.php'; | ||
94 | 60 | ||
95 | //////////////////////////////// | 61 | //////////////////////////////// |
96 | // Load config file | 62 | // Load config file |
@@ -415,6 +381,7 @@ if (!$debug_mode) { | |||
415 | ////////////////////////////////// | 381 | ////////////////////////////////// |
416 | // Set up HTTP agent | 382 | // Set up HTTP agent |
417 | ////////////////////////////////// | 383 | ////////////////////////////////// |
384 | global $http; | ||
418 | $http = new HumbleHttpAgent(); | 385 | $http = new HumbleHttpAgent(); |
419 | $http->debug = $debug_mode; | 386 | $http->debug = $debug_mode; |
420 | $http->userAgentMap = $options->user_agents; | 387 | $http->userAgentMap = $options->user_agents; |
@@ -478,29 +445,6 @@ if ($html_only || !$result) { | |||
478 | $isDummyFeed = true; | 445 | $isDummyFeed = true; |
479 | unset($feed, $result); | 446 | unset($feed, $result); |
480 | // create single item dummy feed object | 447 | // create single item dummy feed object |
481 | class DummySingleItemFeed { | ||
482 | public $item; | ||
483 | function __construct($url) { $this->item = new DummySingleItem($url); } | ||
484 | public function get_title() { return ''; } | ||
485 | public function get_description() { return 'Content extracted from '.$this->item->url; } | ||
486 | public function get_link() { return $this->item->url; } | ||
487 | public function get_language() { return false; } | ||
488 | public function get_image_url() { return false; } | ||
489 | public function get_items($start=0, $max=1) { return array(0=>$this->item); } | ||
490 | } | ||
491 | class DummySingleItem { | ||
492 | public $url; | ||
493 | function __construct($url) { $this->url = $url; } | ||
494 | public function get_permalink() { return $this->url; } | ||
495 | public function get_title() { return null; } | ||
496 | public function get_date($format='') { return false; } | ||
497 | public function get_author($key=0) { return null; } | ||
498 | public function get_authors() { return null; } | ||
499 | public function get_description() { return ''; } | ||
500 | public function get_enclosure($key=0, $prefer=null) { return null; } | ||
501 | public function get_enclosures() { return null; } | ||
502 | public function get_categories() { return null; } | ||
503 | } | ||
504 | $feed = new DummySingleItemFeed($url); | 448 | $feed = new DummySingleItemFeed($url); |
505 | } | 449 | } |
506 | 450 | ||
@@ -903,294 +847,3 @@ if (!$debug_mode) { | |||
903 | if ($callback) echo ');'; | 847 | if ($callback) echo ');'; |
904 | } | 848 | } |
905 | 849 | ||
906 | /////////////////////////////// | ||
907 | // HELPER FUNCTIONS | ||
908 | /////////////////////////////// | ||
909 | |||
910 | function url_allowed($url) { | ||
911 | global $options; | ||
912 | if (!empty($options->allowed_urls)) { | ||
913 | $allowed = false; | ||
914 | foreach ($options->allowed_urls as $allowurl) { | ||
915 | if (stristr($url, $allowurl) !== false) { | ||
916 | $allowed = true; | ||
917 | break; | ||
918 | } | ||
919 | } | ||
920 | if (!$allowed) return false; | ||
921 | } else { | ||
922 | foreach ($options->blocked_urls as $blockurl) { | ||
923 | if (stristr($url, $blockurl) !== false) { | ||
924 | return false; | ||
925 | } | ||
926 | } | ||
927 | } | ||
928 | return true; | ||
929 | } | ||
930 | |||
931 | ////////////////////////////////////////////// | ||
932 | // Convert $html to UTF8 | ||
933 | // (uses HTTP headers and HTML to find encoding) | ||
934 | // adapted from http://stackoverflow.com/questions/910793/php-detect-encoding-and-make-everything-utf-8 | ||
935 | ////////////////////////////////////////////// | ||
936 | function convert_to_utf8($html, $header=null) | ||
937 | { | ||
938 | $encoding = null; | ||
939 | if ($html || $header) { | ||
940 | if (is_array($header)) $header = implode("\n", $header); | ||
941 | if (!$header || !preg_match_all('/^Content-Type:\s+([^;]+)(?:;\s*charset=["\']?([^;"\'\n]*))?/im', $header, $match, PREG_SET_ORDER)) { | ||
942 | // error parsing the response | ||
943 | debug('Could not find Content-Type header in HTTP response'); | ||
944 | } else { | ||
945 | $match = end($match); // get last matched element (in case of redirects) | ||
946 | if (isset($match[2])) $encoding = trim($match[2], "\"' \r\n\0\x0B\t"); | ||
947 | } | ||
948 | // TODO: check to see if encoding is supported (can we convert it?) | ||
949 | // If it's not, result will be empty string. | ||
950 | // For now we'll check for invalid encoding types returned by some sites, e.g. 'none' | ||
951 | // Problem URL: http://facta.co.jp/blog/archives/20111026001026.html | ||
952 | if (!$encoding || $encoding == 'none') { | ||
953 | // search for encoding in HTML - only look at the first 50000 characters | ||
954 | // Why 50000? See, for example, http://www.lemonde.fr/festival-de-cannes/article/2012/05/23/deux-cretes-en-goguette-sur-la-croisette_1705732_766360.html | ||
955 | // TODO: improve this so it looks at smaller chunks first | ||
956 | $html_head = substr($html, 0, 50000); | ||
957 | if (preg_match('/^<\?xml\s+version=(?:"[^"]*"|\'[^\']*\')\s+encoding=("[^"]*"|\'[^\']*\')/s', $html_head, $match)) { | ||
958 | $encoding = trim($match[1], '"\''); | ||
959 | } elseif (preg_match('/<meta\s+http-equiv=["\']?Content-Type["\']? content=["\'][^;]+;\s*charset=["\']?([^;"\'>]+)/i', $html_head, $match)) { | ||
960 | $encoding = trim($match[1]); | ||
961 | } elseif (preg_match_all('/<meta\s+([^>]+)>/i', $html_head, $match)) { | ||
962 | foreach ($match[1] as $_test) { | ||
963 | if (preg_match('/charset=["\']?([^"\']+)/i', $_test, $_m)) { | ||
964 | $encoding = trim($_m[1]); | ||
965 | break; | ||
966 | } | ||
967 | } | ||
968 | } | ||
969 | } | ||
970 | if (isset($encoding)) $encoding = trim($encoding); | ||
971 | // trim is important here! | ||
972 | if (!$encoding || (strtolower($encoding) == 'iso-8859-1')) { | ||
973 | // replace MS Word smart qutoes | ||
974 | $trans = array(); | ||
975 | $trans[chr(130)] = '‚'; // Single Low-9 Quotation Mark | ||
976 | $trans[chr(131)] = 'ƒ'; // Latin Small Letter F With Hook | ||
977 | $trans[chr(132)] = '„'; // Double Low-9 Quotation Mark | ||
978 | $trans[chr(133)] = '…'; // Horizontal Ellipsis | ||
979 | $trans[chr(134)] = '†'; // Dagger | ||
980 | $trans[chr(135)] = '‡'; // Double Dagger | ||
981 | $trans[chr(136)] = 'ˆ'; // Modifier Letter Circumflex Accent | ||
982 | $trans[chr(137)] = '‰'; // Per Mille Sign | ||
983 | $trans[chr(138)] = 'Š'; // Latin Capital Letter S With Caron | ||
984 | $trans[chr(139)] = '‹'; // Single Left-Pointing Angle Quotation Mark | ||
985 | $trans[chr(140)] = 'Œ'; // Latin Capital Ligature OE | ||
986 | $trans[chr(145)] = '‘'; // Left Single Quotation Mark | ||
987 | $trans[chr(146)] = '’'; // Right Single Quotation Mark | ||
988 | $trans[chr(147)] = '“'; // Left Double Quotation Mark | ||
989 | $trans[chr(148)] = '”'; // Right Double Quotation Mark | ||
990 | $trans[chr(149)] = '•'; // Bullet | ||
991 | $trans[chr(150)] = '–'; // En Dash | ||
992 | $trans[chr(151)] = '—'; // Em Dash | ||
993 | $trans[chr(152)] = '˜'; // Small Tilde | ||
994 | $trans[chr(153)] = '™'; // Trade Mark Sign | ||
995 | $trans[chr(154)] = 'š'; // Latin Small Letter S With Caron | ||
996 | $trans[chr(155)] = '›'; // Single Right-Pointing Angle Quotation Mark | ||
997 | $trans[chr(156)] = 'œ'; // Latin Small Ligature OE | ||
998 | $trans[chr(159)] = 'Ÿ'; // Latin Capital Letter Y With Diaeresis | ||
999 | $html = strtr($html, $trans); | ||
1000 | } | ||
1001 | if (!$encoding) { | ||
1002 | debug('No character encoding found, so treating as UTF-8'); | ||
1003 | $encoding = 'utf-8'; | ||
1004 | } else { | ||
1005 | debug('Character encoding: '.$encoding); | ||
1006 | if (strtolower($encoding) != 'utf-8') { | ||
1007 | debug('Converting to UTF-8'); | ||
1008 | $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8'); | ||
1009 | /* | ||
1010 | if (function_exists('iconv')) { | ||
1011 | // iconv appears to handle certain character encodings better than mb_convert_encoding | ||
1012 | $html = iconv($encoding, 'utf-8', $html); | ||
1013 | } else { | ||
1014 | $html = mb_convert_encoding($html, 'utf-8', $encoding); | ||
1015 | } | ||
1016 | */ | ||
1017 | } | ||
1018 | } | ||
1019 | } | ||
1020 | return $html; | ||
1021 | } | ||
1022 | |||
1023 | function makeAbsolute($base, $elem) { | ||
1024 | $base = new SimplePie_IRI($base); | ||
1025 | // remove '//' in URL path (used to prevent URLs from resolving properly) | ||
1026 | // TODO: check if this is still the case | ||
1027 | if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path); | ||
1028 | foreach(array('a'=>'href', 'img'=>'src') as $tag => $attr) { | ||
1029 | $elems = $elem->getElementsByTagName($tag); | ||
1030 | for ($i = $elems->length-1; $i >= 0; $i--) { | ||
1031 | $e = $elems->item($i); | ||
1032 | //$e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e); | ||
1033 | makeAbsoluteAttr($base, $e, $attr); | ||
1034 | } | ||
1035 | if (strtolower($elem->tagName) == $tag) makeAbsoluteAttr($base, $elem, $attr); | ||
1036 | } | ||
1037 | } | ||
1038 | function makeAbsoluteAttr($base, $e, $attr) { | ||
1039 | if ($e->hasAttribute($attr)) { | ||
1040 | // Trim leading and trailing white space. I don't really like this but | ||
1041 | // unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" /> | ||
1042 | $url = trim(str_replace('%20', ' ', $e->getAttribute($attr))); | ||
1043 | $url = str_replace(' ', '%20', $url); | ||
1044 | if (!preg_match('!https?://!i', $url)) { | ||
1045 | if ($absolute = SimplePie_IRI::absolutize($base, $url)) { | ||
1046 | $e->setAttribute($attr, $absolute); | ||
1047 | } | ||
1048 | } | ||
1049 | } | ||
1050 | } | ||
1051 | function makeAbsoluteStr($base, $url) { | ||
1052 | $base = new SimplePie_IRI($base); | ||
1053 | // remove '//' in URL path (causes URLs not to resolve properly) | ||
1054 | if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path); | ||
1055 | if (preg_match('!^https?://!i', $url)) { | ||
1056 | // already absolute | ||
1057 | return $url; | ||
1058 | } else { | ||
1059 | if ($absolute = SimplePie_IRI::absolutize($base, $url)) { | ||
1060 | return $absolute; | ||
1061 | } | ||
1062 | return false; | ||
1063 | } | ||
1064 | } | ||
1065 | // returns single page response, or false if not found | ||
1066 | function getSinglePage($item, $html, $url) { | ||
1067 | global $http, $extractor; | ||
1068 | debug('Looking for site config files to see if single page link exists'); | ||
1069 | $site_config = $extractor->buildSiteConfig($url, $html); | ||
1070 | $splink = null; | ||
1071 | if (!empty($site_config->single_page_link)) { | ||
1072 | $splink = $site_config->single_page_link; | ||
1073 | } elseif (!empty($site_config->single_page_link_in_feed)) { | ||
1074 | // single page link xpath is targeted at feed | ||
1075 | $splink = $site_config->single_page_link_in_feed; | ||
1076 | // so let's replace HTML with feed item description | ||
1077 | $html = $item->get_description(); | ||
1078 | } | ||
1079 | if (isset($splink)) { | ||
1080 | // Build DOM tree from HTML | ||
1081 | $readability = new Readability($html, $url); | ||
1082 | $xpath = new DOMXPath($readability->dom); | ||
1083 | // Loop through single_page_link xpath expressions | ||
1084 | $single_page_url = null; | ||
1085 | foreach ($splink as $pattern) { | ||
1086 | $elems = @$xpath->evaluate($pattern, $readability->dom); | ||
1087 | if (is_string($elems)) { | ||
1088 | $single_page_url = trim($elems); | ||
1089 | break; | ||
1090 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { | ||
1091 | foreach ($elems as $item) { | ||
1092 | if ($item instanceof DOMElement && $item->hasAttribute('href')) { | ||
1093 | $single_page_url = $item->getAttribute('href'); | ||
1094 | break 2; | ||
1095 | } elseif ($item instanceof DOMAttr && $item->value) { | ||
1096 | $single_page_url = $item->value; | ||
1097 | break 2; | ||
1098 | } | ||
1099 | } | ||
1100 | } | ||
1101 | } | ||
1102 | // If we've got URL, resolve against $url | ||
1103 | if (isset($single_page_url) && ($single_page_url = makeAbsoluteStr($url, $single_page_url))) { | ||
1104 | // check it's not what we have already! | ||
1105 | if ($single_page_url != $url) { | ||
1106 | // it's not, so let's try to fetch it... | ||
1107 | $_prev_ref = $http->referer; | ||
1108 | $http->referer = $single_page_url; | ||
1109 | if (($response = $http->get($single_page_url, true)) && $response['status_code'] < 300) { | ||
1110 | $http->referer = $_prev_ref; | ||
1111 | return $response; | ||
1112 | } | ||
1113 | $http->referer = $_prev_ref; | ||
1114 | } | ||
1115 | } | ||
1116 | } | ||
1117 | return false; | ||
1118 | } | ||
1119 | |||
1120 | // based on content-type http header, decide what to do | ||
1121 | // param: HTTP headers string | ||
1122 | // return: array with keys: 'mime', 'type', 'subtype', 'action', 'name' | ||
1123 | // e.g. array('mime'=>'image/jpeg', 'type'=>'image', 'subtype'=>'jpeg', 'action'=>'link', 'name'=>'Image') | ||
1124 | function get_mime_action_info($headers) { | ||
1125 | global $options; | ||
1126 | // check if action defined for returned Content-Type | ||
1127 | $info = array(); | ||
1128 | if (preg_match('!^Content-Type:\s*(([-\w]+)/([-\w\+]+))!im', $headers, $match)) { | ||
1129 | // look for full mime type (e.g. image/jpeg) or just type (e.g. image) | ||
1130 | // match[1] = full mime type, e.g. image/jpeg | ||
1131 | // match[2] = first part, e.g. image | ||
1132 | // match[3] = last part, e.g. jpeg | ||
1133 | $info['mime'] = strtolower(trim($match[1])); | ||
1134 | $info['type'] = strtolower(trim($match[2])); | ||
1135 | $info['subtype'] = strtolower(trim($match[3])); | ||
1136 | foreach (array($info['mime'], $info['type']) as $_mime) { | ||
1137 | if (isset($options->content_type_exc[$_mime])) { | ||
1138 | $info['action'] = $options->content_type_exc[$_mime]['action']; | ||
1139 | $info['name'] = $options->content_type_exc[$_mime]['name']; | ||
1140 | break; | ||
1141 | } | ||
1142 | } | ||
1143 | } | ||
1144 | return $info; | ||
1145 | } | ||
1146 | |||
1147 | function remove_url_cruft($url) { | ||
1148 | // remove google analytics for the time being | ||
1149 | // regex adapted from http://navitronic.co.uk/2010/12/removing-google-analytics-cruft-from-urls/ | ||
1150 | // https://gist.github.com/758177 | ||
1151 | return preg_replace('/(\?|\&)utm_[a-z]+=[^\&]+/', '', $url); | ||
1152 | } | ||
1153 | |||
1154 | function make_substitutions($string) { | ||
1155 | if ($string == '') return $string; | ||
1156 | global $item, $effective_url; | ||
1157 | $string = str_replace('{url}', htmlspecialchars($item->get_permalink()), $string); | ||
1158 | $string = str_replace('{effective-url}', htmlspecialchars($effective_url), $string); | ||
1159 | return $string; | ||
1160 | } | ||
1161 | |||
1162 | function get_cache() { | ||
1163 | global $options, $valid_key; | ||
1164 | static $cache = null; | ||
1165 | if ($cache === null) { | ||
1166 | $frontendOptions = array( | ||
1167 | 'lifetime' => 10*60, // cache lifetime of 10 minutes | ||
1168 | 'automatic_serialization' => false, | ||
1169 | 'write_control' => false, | ||
1170 | 'automatic_cleaning_factor' => $options->cache_cleanup, | ||
1171 | 'ignore_user_abort' => false | ||
1172 | ); | ||
1173 | $backendOptions = array( | ||
1174 | 'cache_dir' => ($valid_key) ? $options->cache_dir.'/rss-with-key/' : $options->cache_dir.'/rss/', // directory where to put the cache files | ||
1175 | 'file_locking' => false, | ||
1176 | 'read_control' => true, | ||
1177 | 'read_control_type' => 'strlen', | ||
1178 | 'hashed_directory_level' => $options->cache_directory_level, | ||
1179 | 'hashed_directory_perm' => 0777, | ||
1180 | 'cache_file_perm' => 0664, | ||
1181 | 'file_name_prefix' => 'ff' | ||
1182 | ); | ||
1183 | // getting a Zend_Cache_Core object | ||
1184 | $cache = Zend_Cache::factory('Core', 'File', $frontendOptions, $backendOptions); | ||
1185 | } | ||
1186 | return $cache; | ||
1187 | } | ||
1188 | |||
1189 | function debug($msg) { | ||
1190 | global $debug_mode; | ||
1191 | if ($debug_mode) { | ||
1192 | echo '* ',$msg,"\n"; | ||
1193 | ob_flush(); | ||
1194 | flush(); | ||
1195 | } | ||
1196 | } \ No newline at end of file | ||
diff --git a/inc/3rdparty/makefulltextfeedHelpers.php b/inc/3rdparty/makefulltextfeedHelpers.php new file mode 100755 index 00000000..1c11b8f6 --- /dev/null +++ b/inc/3rdparty/makefulltextfeedHelpers.php | |||
@@ -0,0 +1,355 @@ | |||
1 | <?php | ||
2 | |||
3 | // Autoloading of classes allows us to include files only when they're | ||
4 | // needed. If we've got a cached copy, for example, only Zend_Cache is loaded. | ||
5 | function autoload($class_name) { | ||
6 | static $dir = null; | ||
7 | if ($dir === null) $dir = dirname(__FILE__).'/libraries/'; | ||
8 | static $mapping = array( | ||
9 | // Include FeedCreator for RSS/Atom creation | ||
10 | 'FeedWriter' => 'feedwriter/FeedWriter.php', | ||
11 | 'FeedItem' => 'feedwriter/FeedItem.php', | ||
12 | // Include ContentExtractor and Readability for identifying and extracting content from URLs | ||
13 | 'ContentExtractor' => 'content-extractor/ContentExtractor.php', | ||
14 | 'SiteConfig' => 'content-extractor/SiteConfig.php', | ||
15 | 'Readability' => 'readability/Readability.php', | ||
16 | // Include Humble HTTP Agent to allow parallel requests and response caching | ||
17 | 'HumbleHttpAgent' => 'humble-http-agent/HumbleHttpAgent.php', | ||
18 | 'SimplePie_HumbleHttpAgent' => 'humble-http-agent/SimplePie_HumbleHttpAgent.php', | ||
19 | 'CookieJar' => 'humble-http-agent/CookieJar.php', | ||
20 | // Include Zend Cache to improve performance (cache results) | ||
21 | 'Zend_Cache' => 'Zend/Cache.php', | ||
22 | // Language detect | ||
23 | 'Text_LanguageDetect' => 'language-detect/LanguageDetect.php', | ||
24 | // HTML5 Lib | ||
25 | 'HTML5_Parser' => 'html5/Parser.php', | ||
26 | // htmLawed - used if XSS filter is enabled (xss_filter) | ||
27 | 'htmLawed' => 'htmLawed/htmLawed.php' | ||
28 | ); | ||
29 | if (isset($mapping[$class_name])) { | ||
30 | debug("** Loading class $class_name ({$mapping[$class_name]})"); | ||
31 | require $dir.$mapping[$class_name]; | ||
32 | return true; | ||
33 | } else { | ||
34 | return false; | ||
35 | } | ||
36 | } | ||
37 | spl_autoload_register('autoload'); | ||
38 | require dirname(__FILE__).'/libraries/simplepie/autoloader.php'; | ||
39 | |||
40 | |||
41 | class DummySingleItemFeed { | ||
42 | public $item; | ||
43 | function __construct($url) { $this->item = new DummySingleItem($url); } | ||
44 | public function get_title() { return ''; } | ||
45 | public function get_description() { return 'Content extracted from '.$this->item->url; } | ||
46 | public function get_link() { return $this->item->url; } | ||
47 | public function get_language() { return false; } | ||
48 | public function get_image_url() { return false; } | ||
49 | public function get_items($start=0, $max=1) { return array(0=>$this->item); } | ||
50 | } | ||
51 | class DummySingleItem { | ||
52 | public $url; | ||
53 | function __construct($url) { $this->url = $url; } | ||
54 | public function get_permalink() { return $this->url; } | ||
55 | public function get_title() { return null; } | ||
56 | public function get_date($format='') { return false; } | ||
57 | public function get_author($key=0) { return null; } | ||
58 | public function get_authors() { return null; } | ||
59 | public function get_description() { return ''; } | ||
60 | public function get_enclosure($key=0, $prefer=null) { return null; } | ||
61 | public function get_enclosures() { return null; } | ||
62 | public function get_categories() { return null; } | ||
63 | } | ||
64 | |||
65 | /////////////////////////////// | ||
66 | // HELPER FUNCTIONS | ||
67 | /////////////////////////////// | ||
68 | |||
69 | function url_allowed($url) { | ||
70 | global $options; | ||
71 | if (!empty($options->allowed_urls)) { | ||
72 | $allowed = false; | ||
73 | foreach ($options->allowed_urls as $allowurl) { | ||
74 | if (stristr($url, $allowurl) !== false) { | ||
75 | $allowed = true; | ||
76 | break; | ||
77 | } | ||
78 | } | ||
79 | if (!$allowed) return false; | ||
80 | } else { | ||
81 | foreach ($options->blocked_urls as $blockurl) { | ||
82 | if (stristr($url, $blockurl) !== false) { | ||
83 | return false; | ||
84 | } | ||
85 | } | ||
86 | } | ||
87 | return true; | ||
88 | } | ||
89 | |||
90 | ////////////////////////////////////////////// | ||
91 | // Convert $html to UTF8 | ||
92 | // (uses HTTP headers and HTML to find encoding) | ||
93 | // adapted from http://stackoverflow.com/questions/910793/php-detect-encoding-and-make-everything-utf-8 | ||
94 | ////////////////////////////////////////////// | ||
95 | function convert_to_utf8($html, $header=null) | ||
96 | { | ||
97 | $encoding = null; | ||
98 | if ($html || $header) { | ||
99 | if (is_array($header)) $header = implode("\n", $header); | ||
100 | if (!$header || !preg_match_all('/^Content-Type:\s+([^;]+)(?:;\s*charset=["\']?([^;"\'\n]*))?/im', $header, $match, PREG_SET_ORDER)) { | ||
101 | // error parsing the response | ||
102 | debug('Could not find Content-Type header in HTTP response'); | ||
103 | } else { | ||
104 | $match = end($match); // get last matched element (in case of redirects) | ||
105 | if (isset($match[2])) $encoding = trim($match[2], "\"' \r\n\0\x0B\t"); | ||
106 | } | ||
107 | // TODO: check to see if encoding is supported (can we convert it?) | ||
108 | // If it's not, result will be empty string. | ||
109 | // For now we'll check for invalid encoding types returned by some sites, e.g. 'none' | ||
110 | // Problem URL: http://facta.co.jp/blog/archives/20111026001026.html | ||
111 | if (!$encoding || $encoding == 'none') { | ||
112 | // search for encoding in HTML - only look at the first 50000 characters | ||
113 | // Why 50000? See, for example, http://www.lemonde.fr/festival-de-cannes/article/2012/05/23/deux-cretes-en-goguette-sur-la-croisette_1705732_766360.html | ||
114 | // TODO: improve this so it looks at smaller chunks first | ||
115 | $html_head = substr($html, 0, 50000); | ||
116 | if (preg_match('/^<\?xml\s+version=(?:"[^"]*"|\'[^\']*\')\s+encoding=("[^"]*"|\'[^\']*\')/s', $html_head, $match)) { | ||
117 | $encoding = trim($match[1], '"\''); | ||
118 | } elseif (preg_match('/<meta\s+http-equiv=["\']?Content-Type["\']? content=["\'][^;]+;\s*charset=["\']?([^;"\'>]+)/i', $html_head, $match)) { | ||
119 | $encoding = trim($match[1]); | ||
120 | } elseif (preg_match_all('/<meta\s+([^>]+)>/i', $html_head, $match)) { | ||
121 | foreach ($match[1] as $_test) { | ||
122 | if (preg_match('/charset=["\']?([^"\']+)/i', $_test, $_m)) { | ||
123 | $encoding = trim($_m[1]); | ||
124 | break; | ||
125 | } | ||
126 | } | ||
127 | } | ||
128 | } | ||
129 | if (isset($encoding)) $encoding = trim($encoding); | ||
130 | // trim is important here! | ||
131 | if (!$encoding || (strtolower($encoding) == 'iso-8859-1')) { | ||
132 | // replace MS Word smart qutoes | ||
133 | $trans = array(); | ||
134 | $trans[chr(130)] = '‚'; // Single Low-9 Quotation Mark | ||
135 | $trans[chr(131)] = 'ƒ'; // Latin Small Letter F With Hook | ||
136 | $trans[chr(132)] = '„'; // Double Low-9 Quotation Mark | ||
137 | $trans[chr(133)] = '…'; // Horizontal Ellipsis | ||
138 | $trans[chr(134)] = '†'; // Dagger | ||
139 | $trans[chr(135)] = '‡'; // Double Dagger | ||
140 | $trans[chr(136)] = 'ˆ'; // Modifier Letter Circumflex Accent | ||
141 | $trans[chr(137)] = '‰'; // Per Mille Sign | ||
142 | $trans[chr(138)] = 'Š'; // Latin Capital Letter S With Caron | ||
143 | $trans[chr(139)] = '‹'; // Single Left-Pointing Angle Quotation Mark | ||
144 | $trans[chr(140)] = 'Œ'; // Latin Capital Ligature OE | ||
145 | $trans[chr(145)] = '‘'; // Left Single Quotation Mark | ||
146 | $trans[chr(146)] = '’'; // Right Single Quotation Mark | ||
147 | $trans[chr(147)] = '“'; // Left Double Quotation Mark | ||
148 | $trans[chr(148)] = '”'; // Right Double Quotation Mark | ||
149 | $trans[chr(149)] = '•'; // Bullet | ||
150 | $trans[chr(150)] = '–'; // En Dash | ||
151 | $trans[chr(151)] = '—'; // Em Dash | ||
152 | $trans[chr(152)] = '˜'; // Small Tilde | ||
153 | $trans[chr(153)] = '™'; // Trade Mark Sign | ||
154 | $trans[chr(154)] = 'š'; // Latin Small Letter S With Caron | ||
155 | $trans[chr(155)] = '›'; // Single Right-Pointing Angle Quotation Mark | ||
156 | $trans[chr(156)] = 'œ'; // Latin Small Ligature OE | ||
157 | $trans[chr(159)] = 'Ÿ'; // Latin Capital Letter Y With Diaeresis | ||
158 | $html = strtr($html, $trans); | ||
159 | } | ||
160 | if (!$encoding) { | ||
161 | debug('No character encoding found, so treating as UTF-8'); | ||
162 | $encoding = 'utf-8'; | ||
163 | } else { | ||
164 | debug('Character encoding: '.$encoding); | ||
165 | if (strtolower($encoding) != 'utf-8') { | ||
166 | debug('Converting to UTF-8'); | ||
167 | $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8'); | ||
168 | /* | ||
169 | if (function_exists('iconv')) { | ||
170 | // iconv appears to handle certain character encodings better than mb_convert_encoding | ||
171 | $html = iconv($encoding, 'utf-8', $html); | ||
172 | } else { | ||
173 | $html = mb_convert_encoding($html, 'utf-8', $encoding); | ||
174 | } | ||
175 | */ | ||
176 | } | ||
177 | } | ||
178 | } | ||
179 | return $html; | ||
180 | } | ||
181 | |||
182 | function makeAbsolute($base, $elem) { | ||
183 | $base = new SimplePie_IRI($base); | ||
184 | // remove '//' in URL path (used to prevent URLs from resolving properly) | ||
185 | // TODO: check if this is still the case | ||
186 | if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path); | ||
187 | foreach(array('a'=>'href', 'img'=>'src') as $tag => $attr) { | ||
188 | $elems = $elem->getElementsByTagName($tag); | ||
189 | for ($i = $elems->length-1; $i >= 0; $i--) { | ||
190 | $e = $elems->item($i); | ||
191 | //$e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e); | ||
192 | makeAbsoluteAttr($base, $e, $attr); | ||
193 | } | ||
194 | if (strtolower($elem->tagName) == $tag) makeAbsoluteAttr($base, $elem, $attr); | ||
195 | } | ||
196 | } | ||
197 | function makeAbsoluteAttr($base, $e, $attr) { | ||
198 | if ($e->hasAttribute($attr)) { | ||
199 | // Trim leading and trailing white space. I don't really like this but | ||
200 | // unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" /> | ||
201 | $url = trim(str_replace('%20', ' ', $e->getAttribute($attr))); | ||
202 | $url = str_replace(' ', '%20', $url); | ||
203 | if (!preg_match('!https?://!i', $url)) { | ||
204 | if ($absolute = SimplePie_IRI::absolutize($base, $url)) { | ||
205 | $e->setAttribute($attr, $absolute); | ||
206 | } | ||
207 | } | ||
208 | } | ||
209 | } | ||
210 | function makeAbsoluteStr($base, $url) { | ||
211 | $base = new SimplePie_IRI($base); | ||
212 | // remove '//' in URL path (causes URLs not to resolve properly) | ||
213 | if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path); | ||
214 | if (preg_match('!^https?://!i', $url)) { | ||
215 | // already absolute | ||
216 | return $url; | ||
217 | } else { | ||
218 | if ($absolute = SimplePie_IRI::absolutize($base, $url)) { | ||
219 | return $absolute; | ||
220 | } | ||
221 | return false; | ||
222 | } | ||
223 | } | ||
224 | // returns single page response, or false if not found | ||
225 | function getSinglePage($item, $html, $url) { | ||
226 | global $http, $extractor; | ||
227 | debug('Looking for site config files to see if single page link exists'); | ||
228 | $site_config = $extractor->buildSiteConfig($url, $html); | ||
229 | $splink = null; | ||
230 | if (!empty($site_config->single_page_link)) { | ||
231 | $splink = $site_config->single_page_link; | ||
232 | } elseif (!empty($site_config->single_page_link_in_feed)) { | ||
233 | // single page link xpath is targeted at feed | ||
234 | $splink = $site_config->single_page_link_in_feed; | ||
235 | // so let's replace HTML with feed item description | ||
236 | $html = $item->get_description(); | ||
237 | } | ||
238 | if (isset($splink)) { | ||
239 | // Build DOM tree from HTML | ||
240 | $readability = new Readability($html, $url); | ||
241 | $xpath = new DOMXPath($readability->dom); | ||
242 | // Loop through single_page_link xpath expressions | ||
243 | $single_page_url = null; | ||
244 | foreach ($splink as $pattern) { | ||
245 | $elems = @$xpath->evaluate($pattern, $readability->dom); | ||
246 | if (is_string($elems)) { | ||
247 | $single_page_url = trim($elems); | ||
248 | break; | ||
249 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { | ||
250 | foreach ($elems as $item) { | ||
251 | if ($item instanceof DOMElement && $item->hasAttribute('href')) { | ||
252 | $single_page_url = $item->getAttribute('href'); | ||
253 | break 2; | ||
254 | } elseif ($item instanceof DOMAttr && $item->value) { | ||
255 | $single_page_url = $item->value; | ||
256 | break 2; | ||
257 | } | ||
258 | } | ||
259 | } | ||
260 | } | ||
261 | // If we've got URL, resolve against $url | ||
262 | if (isset($single_page_url) && ($single_page_url = makeAbsoluteStr($url, $single_page_url))) { | ||
263 | // check it's not what we have already! | ||
264 | if ($single_page_url != $url) { | ||
265 | // it's not, so let's try to fetch it... | ||
266 | $_prev_ref = $http->referer; | ||
267 | $http->referer = $single_page_url; | ||
268 | if (($response = $http->get($single_page_url, true)) && $response['status_code'] < 300) { | ||
269 | $http->referer = $_prev_ref; | ||
270 | return $response; | ||
271 | } | ||
272 | $http->referer = $_prev_ref; | ||
273 | } | ||
274 | } | ||
275 | } | ||
276 | return false; | ||
277 | } | ||
278 | |||
279 | // based on content-type http header, decide what to do | ||
280 | // param: HTTP headers string | ||
281 | // return: array with keys: 'mime', 'type', 'subtype', 'action', 'name' | ||
282 | // e.g. array('mime'=>'image/jpeg', 'type'=>'image', 'subtype'=>'jpeg', 'action'=>'link', 'name'=>'Image') | ||
283 | function get_mime_action_info($headers) { | ||
284 | global $options; | ||
285 | // check if action defined for returned Content-Type | ||
286 | $info = array(); | ||
287 | if (preg_match('!^Content-Type:\s*(([-\w]+)/([-\w\+]+))!im', $headers, $match)) { | ||
288 | // look for full mime type (e.g. image/jpeg) or just type (e.g. image) | ||
289 | // match[1] = full mime type, e.g. image/jpeg | ||
290 | // match[2] = first part, e.g. image | ||
291 | // match[3] = last part, e.g. jpeg | ||
292 | $info['mime'] = strtolower(trim($match[1])); | ||
293 | $info['type'] = strtolower(trim($match[2])); | ||
294 | $info['subtype'] = strtolower(trim($match[3])); | ||
295 | foreach (array($info['mime'], $info['type']) as $_mime) { | ||
296 | if (isset($options->content_type_exc[$_mime])) { | ||
297 | $info['action'] = $options->content_type_exc[$_mime]['action']; | ||
298 | $info['name'] = $options->content_type_exc[$_mime]['name']; | ||
299 | break; | ||
300 | } | ||
301 | } | ||
302 | } | ||
303 | return $info; | ||
304 | } | ||
305 | |||
306 | function remove_url_cruft($url) { | ||
307 | // remove google analytics for the time being | ||
308 | // regex adapted from http://navitronic.co.uk/2010/12/removing-google-analytics-cruft-from-urls/ | ||
309 | // https://gist.github.com/758177 | ||
310 | return preg_replace('/(\?|\&)utm_[a-z]+=[^\&]+/', '', $url); | ||
311 | } | ||
312 | |||
313 | function make_substitutions($string) { | ||
314 | if ($string == '') return $string; | ||
315 | global $item, $effective_url; | ||
316 | $string = str_replace('{url}', htmlspecialchars($item->get_permalink()), $string); | ||
317 | $string = str_replace('{effective-url}', htmlspecialchars($effective_url), $string); | ||
318 | return $string; | ||
319 | } | ||
320 | |||
321 | function get_cache() { | ||
322 | global $options, $valid_key; | ||
323 | static $cache = null; | ||
324 | if ($cache === null) { | ||
325 | $frontendOptions = array( | ||
326 | 'lifetime' => 10*60, // cache lifetime of 10 minutes | ||
327 | 'automatic_serialization' => false, | ||
328 | 'write_control' => false, | ||
329 | 'automatic_cleaning_factor' => $options->cache_cleanup, | ||
330 | 'ignore_user_abort' => false | ||
331 | ); | ||
332 | $backendOptions = array( | ||
333 | 'cache_dir' => ($valid_key) ? $options->cache_dir.'/rss-with-key/' : $options->cache_dir.'/rss/', // directory where to put the cache files | ||
334 | 'file_locking' => false, | ||
335 | 'read_control' => true, | ||
336 | 'read_control_type' => 'strlen', | ||
337 | 'hashed_directory_level' => $options->cache_directory_level, | ||
338 | 'hashed_directory_perm' => 0777, | ||
339 | 'cache_file_perm' => 0664, | ||
340 | 'file_name_prefix' => 'ff' | ||
341 | ); | ||
342 | // getting a Zend_Cache_Core object | ||
343 | $cache = Zend_Cache::factory('Core', 'File', $frontendOptions, $backendOptions); | ||
344 | } | ||
345 | return $cache; | ||
346 | } | ||
347 | |||
348 | function debug($msg) { | ||
349 | global $debug_mode; | ||
350 | if ($debug_mode) { | ||
351 | echo '* ',$msg,"\n"; | ||
352 | ob_flush(); | ||
353 | flush(); | ||
354 | } | ||
355 | } | ||