From eb1af592194e225bf887e4893e697f0ab8dd9a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 4 Aug 2013 20:58:31 +0200 Subject: refactoring --- inc/poche/Url.class.php | 94 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 inc/poche/Url.class.php (limited to 'inc/poche/Url.class.php') diff --git a/inc/poche/Url.class.php b/inc/poche/Url.class.php new file mode 100644 index 00000000..f4a8f99e --- /dev/null +++ b/inc/poche/Url.class.php @@ -0,0 +1,94 @@ + + * @copyright 2013 + * @license http://www.wtfpl.net/ see COPYING file + */ + +class Url +{ + public $url; + + function __construct($url) + { + $this->url = base64_decode($url); + } + + public function getUrl() { + return $this->url; + } + + public function setUrl($url) { + $this->url = $url; + } + + public function isCorrect() + { + $pattern = '|^http(s)?://[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(/.*)?$|i'; + + return preg_match($pattern, $this->url); + } + + public function clean() + { + $url = html_entity_decode(trim($this->url)); + + $stuff = strpos($url,'&utm_source='); + if ($stuff !== FALSE) + $url = substr($url, 0, $stuff); + $stuff = strpos($url,'?utm_source='); + if ($stuff !== FALSE) + $url = substr($url, 0, $stuff); + $stuff = strpos($url,'#xtor=RSS-'); + if ($stuff !== FALSE) + $url = substr($url, 0, $stuff); + + $this->url = $url; + } + + public function fetchContent() + { + if ($this->isCorrect()) { + $this->clean(); + $html = Encoding::toUTF8(Tools::getFile($this->getUrl())); + + # if Tools::getFile() if not able to retrieve HTTPS content, try the same URL with HTTP protocol + if (!preg_match('!^https?://!i', $this->getUrl()) && (!isset($html) || strlen($html) <= 0)) { + $this->setUrl('http://' . $this->getUrl()); + $html = Encoding::toUTF8(Tools::getFile($this->getUrl())); + } + + if (function_exists('tidy_parse_string')) { + $tidy = tidy_parse_string($html, array(), 'UTF8'); + $tidy->cleanRepair(); + $html = $tidy->value; + } + + $parameters = array(); + if (isset($html) and strlen($html) > 0) + { + $readability = new Readability($html, $this->getUrl()); + $readability->convertLinksToFootnotes = CONVERT_LINKS_FOOTNOTES; + $readability->revertForcedParagraphElements = REVERT_FORCED_PARAGRAPH_ELEMENTS; + + if($readability->init()) + { + $content = $readability->articleContent->innerHTML; + $parameters['title'] = $readability->articleTitle->innerHTML; + $parameters['content'] = $content; + + return $parameters; + } + } + } + else { + #$msg->add('e', _('error during url preparation : the link is not valid')); + Tools::logm($this->getUrl() . ' is not a valid url'); + } + + return FALSE; + } +} \ No newline at end of file -- cgit v1.2.3