3 * poche, a read it later open source system
6 * @author Nicolas LÅ“uillet <support@inthepoche.com>
8 * @license http://www.wtfpl.net/ see COPYING file
15 function __construct($url)
17 $this->url
= base64_decode($url);
20 public function getUrl() {
24 public function setUrl($url) {
28 public function isCorrect()
30 $pattern = '|^(.*:)//([a-z\-.]+)(:[0-9]+)?(.*)$|i';
32 return preg_match($pattern, $this->url);
35 public function clean()
37 $url = html_entity_decode(trim($this->url));
39 $stuff = strpos($url,'&utm_source
=');
41 $url = substr($url, 0, $stuff);
42 $stuff = strpos($url,'?utm_source
=');
44 $url = substr($url, 0, $stuff);
45 $stuff = strpos($url,'#xtor=RSS-');
47 $url = substr($url, 0, $stuff);
52 public function fetchContent()
54 if ($this->isCorrect()) {
56 $html = Encoding
::toUTF8(Tools
::getFile($this->getUrl()));
58 # if Tools::getFile() if not able to retrieve HTTPS content, try the same URL with HTTP protocol
59 if (!preg_match('!^https?://!i', $this->getUrl()) && (!isset($html) || strlen($html) <= 0)) {
60 $this->setUrl('http://' . $this->getUrl());
61 $html = Encoding
::toUTF8(Tools
::getFile($this->getUrl()));
64 if (function_exists('tidy_parse_string')) {
65 $tidy = tidy_parse_string($html, array(), 'UTF8');
70 $parameters = array();
71 if (isset($html) and strlen($html) > 0)
73 $readability = new Readability($html, $this->getUrl());
74 $readability->convertLinksToFootnotes
= CONVERT_LINKS_FOOTNOTES
;
75 $readability->revertForcedParagraphElements
= REVERT_FORCED_PARAGRAPH_ELEMENTS
;
77 if($readability->init())
79 $content = $readability->articleContent
->innerHTML
;
80 $parameters['title'] = $readability->articleTitle
->innerHTML
;
81 $parameters['content'] = $content;
88 #$msg->add('e', _('error during url preparation : the link is not valid'));
89 Tools
::logm($this->getUrl() . ' is not a valid url');