3 * poche, a read it later open source system
6 * @author Nicolas LÅ“uillet <support@inthepoche.com>
8 * @license http://www.wtfpl.net/ see COPYING file
15 function __construct($url)
17 $this->url
= base64_decode($url);
20 public function getUrl() {
24 public function setUrl($url) {
28 public function isCorrect()
30 return filter_var($this->url
, FILTER_VALIDATE_URL
) !== FALSE;
33 public function clean()
35 $url = html_entity_decode(trim($this->url
));
37 $stuff = strpos($url,'&utm_source=');
39 $url = substr($url, 0, $stuff);
40 $stuff = strpos($url,'?utm_source=');
42 $url = substr($url, 0, $stuff);
43 $stuff = strpos($url,'#xtor=RSS-');
45 $url = substr($url, 0, $stuff);
50 public function fetchContent()
52 if ($this->isCorrect()) {
54 $html = Encoding
::toUTF8(Tools
::getFile($this->getUrl()));
56 # if Tools::getFile() if not able to retrieve HTTPS content, try the same URL with HTTP protocol
57 if (!preg_match('!^https?://!i', $this->getUrl()) && (!isset($html) || strlen($html) <= 0)) {
58 $this->setUrl('http://' . $this->getUrl());
59 $html = Encoding
::toUTF8(Tools
::getFile($this->getUrl()));
62 if (function_exists('tidy_parse_string')) {
63 $tidy = tidy_parse_string($html, array(), 'UTF8');
66 //Warning: tidy might fail so, ensure there is still a content
67 $body = $tidy->body();
69 //hasChildren does not seem to work, just check the string
70 //returned (and do not forget to clean the white spaces)
71 if (preg_replace('/\s+/', '', $body->value
) !== "<body></body>") {
76 $parameters = array();
77 if (isset($html) and strlen($html) > 0)
79 $readability = new Readability($html, $this->getUrl());
80 $readability->convertLinksToFootnotes
= CONVERT_LINKS_FOOTNOTES
;
81 $readability->revertForcedParagraphElements
= REVERT_FORCED_PARAGRAPH_ELEMENTS
;
83 if($readability->init())
85 $content = $readability->articleContent
->innerHTML
;
86 $parameters['title'] = ($readability->articleTitle
->innerHTML
!= '' ? $readability->articleTitle
->innerHTML
: _('Untitled'));
87 $parameters['content'] = $content;
94 #$msg->add('e', _('error during url preparation : the link is not valid'));
95 Tools
::logm($this->getUrl() . ' is not a valid url');